Separate out some more ICU from base and into base/i18n.

This moves string_util_icu. I moved the number formatting function into base/i18n/number_formatting and just removed the other function in string_util_icu which was TrimWhitespaceUTF8. It is only used in a few places and isn't actually helpful (and the fact that it round-trips through UTF-16 is better for the caller to see). This takes out the sorting from the FileEnumerator. The comment says the sorting is not guaranteed. I moved it into file_util_icu as a standalone function for callers of FileEnumerator to call manually if they need sorted results. I modified the directory lister to use this sorting instead, and filed a bug on doing more optimal JS-based sorting. TEST=none BUG=none Review URL: http://codereview.chromium.org/267001 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@28405 0039d316-1c4b-4281-b951-d872f2087c98
author: brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-10-08 17:38:30 +0000
committer: brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-10-08 17:38:30 +0000
commit: d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036 (patch)
tree: 1c2ee733bf62a44c31dc11f76dad53243a84439f /base
parent: e91d532339c854ff0a082c6562a519647524fa66 (diff)
download: chromium_src-d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036.zip
chromium_src-d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036.tar.gz
chromium_src-d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036.tar.bz2
12 files changed, 250 insertions, 317 deletions
diff --git a/base/base.gyp b/base/base.gyp
index 41108dd..570e9f0 100644
--- a/base/base.gyp
+++ b/base/base.gyp
@@ -121,7 +121,6 @@
         'file_path.h',
         'file_util.cc',
         'file_util.h',
-        'file_util_icu.cc',
         'file_util_linux.cc',
         'file_util_mac.mm',
         'file_util_posix.cc',
@@ -141,8 +140,12 @@
         'hmac_mac.cc',
         'hmac_nss.cc',
         'hmac_win.cc',
+        'i18n/file_util_icu.cc',
+        'i18n/file_util_icu.h',
         'i18n/icu_string_conversions.cc',
         'i18n/icu_string_conversions.h',
+        'i18n/number_formatting.cc',
+        'i18n/number_formatting.h',
         'iat_patch.cc',
         'iat_patch.h',
         'icu_util.cc',
@@ -284,7 +287,6 @@
         'string_tokenizer.h',
         'string_util.cc',
         'string_util.h',
-        'string_util_icu.cc',
         'string_util_win.h',
         'sys_info.h',
         'sys_info_chromeos.cc',
@@ -582,6 +584,7 @@
         'histogram_unittest.cc',
         'hmac_unittest.cc',
         'id_map_unittest.cc',
+        'i18n/file_util_icu_unittest.cc',
         'json_reader_unittest.cc',
         'json_writer_unittest.cc',
         'lazy_instance_unittest.cc',
diff --git a/base/file_util.h b/base/file_util.h
index 6474f048..37634b9 100644
--- a/base/file_util.h
+++ b/base/file_util.h
@@ -103,19 +103,6 @@ void InsertBeforeExtension(FilePath* path, const FilePath::StringType& suffix);
 void ReplaceExtension(FilePath* file_name,
                       const FilePath::StringType& extension);
 
-// Replaces characters in 'file_name' that are illegal for file names with
-// 'replace_char'. 'file_name' must not be a full or relative path, but just the
-// file name component. Any leading or trailing whitespace in 'file_name' is
-// removed.
-// Example:
-//   file_name == "bad:file*name?.txt", changed to: "bad-file-name-.txt" when
-//   'replace_char' is '-'.
-void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char);
-
-// Returns true if file_name does not have any illegal character. The input
-// param has the same restriction as that for ReplaceIllegalCharacters.
-bool IsFilenameLegal(const string16& file_name);
-
 //-----------------------------------------------------------------------------
 // Functions that involve filesystem access or modification:
 
@@ -490,10 +477,6 @@ class FileEnumerator {
   static bool ReadDirectory(std::vector<DirectoryEntryInfo>* entries,
                             const FilePath& source, bool show_links);
 
-  // Comparison function to neatly sort directory entries
-  static bool CompareFiles(const DirectoryEntryInfo& a,
-                           const DirectoryEntryInfo& b);
-
   // The files in the current directory
   std::vector<DirectoryEntryInfo> directory_entries_;
 
@@ -501,7 +484,7 @@ class FileEnumerator {
   size_t current_directory_entry_;
 #endif
 
-  DISALLOW_EVIL_CONSTRUCTORS(FileEnumerator);
+  DISALLOW_COPY_AND_ASSIGN(FileEnumerator);
 };
 
 class MemoryMappedFile {
diff --git a/base/file_util_posix.cc b/base/file_util_posix.cc
index 27adbfa..4621bb3 100644
--- a/base/file_util_posix.cc
+++ b/base/file_util_posix.cc
@@ -35,56 +35,6 @@
 #include "base/sys_string_conversions.h"
 #include "base/time.h"
 #include "base/utf_string_conversions.h"
-#include "unicode/coll.h"
-
-
-namespace {
-
-class LocaleAwareComparator {
- public:
-  LocaleAwareComparator() {
-    UErrorCode error_code = U_ZERO_ERROR;
-    // Use the default collator. The default locale should have been properly
-    // set by the time this constructor is called.
-    collator_.reset(icu::Collator::createInstance(error_code));
-    DCHECK(U_SUCCESS(error_code));
-    // Make it case-sensitive.
-    collator_->setStrength(icu::Collator::TERTIARY);
-    // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we
-    // do not pay performance penalty to guarantee sort order correctness for
-    // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a
-    // reasonable tradeoff because such file names should be rare and the sort
-    // order doesn't change much anyway.
-  }
-
-  // Note: A similar function is available in l10n_util.
-  // We cannot use it because base should not depend on l10n_util.
-  // TODO(yuzo): Move some of l10n_util to base.
-  int Compare(const string16& a, const string16& b) {
-    // We are not sure if Collator::compare is thread-safe.
-    // Use an AutoLock just in case.
-    AutoLock auto_lock(lock_);
-
-    UErrorCode error_code = U_ZERO_ERROR;
-    UCollationResult result = collator_->compare(
-        static_cast<const UChar*>(a.c_str()),
-        static_cast<int>(a.length()),
-        static_cast<const UChar*>(b.c_str()),
-        static_cast<int>(b.length()),
-        error_code);
-    DCHECK(U_SUCCESS(error_code));
-    return result;
-  }
-
- private:
-  scoped_ptr<icu::Collator> collator_;
-  Lock lock_;
-  friend struct DefaultSingletonTraits<LocaleAwareComparator>;
-
-  DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator);
-};
-
-}  // namespace
 
 namespace file_util {
 
@@ -623,9 +573,6 @@ FilePath FileEnumerator::Next() {
     if (!ReadDirectory(&entries, root_path_, file_type_ & SHOW_SYM_LINKS))
       continue;
 
-    // The API says that order is not guaranteed, but order affects UX
-    std::sort(entries.begin(), entries.end(), CompareFiles);
-
     directory_entries_.clear();
     current_directory_entry_ = 0;
     for (std::vector<DirectoryEntryInfo>::const_iterator
@@ -691,23 +638,6 @@ bool FileEnumerator::ReadDirectory(std::vector<DirectoryEntryInfo>* entries,
   return true;
 }
 
-bool FileEnumerator::CompareFiles(const DirectoryEntryInfo& a,
-                                  const DirectoryEntryInfo& b) {
-  // Order lexicographically with directories before other files.
-  if (S_ISDIR(a.stat.st_mode) != S_ISDIR(b.stat.st_mode))
-    return S_ISDIR(a.stat.st_mode);
-
-  // On linux, the file system encoding is not defined. We assume
-  // SysNativeMBToWide takes care of it.
-  //
-  // ICU's collator can take strings in OS native encoding. But we convert the
-  // strings to UTF-16 ourselves to ensure conversion consistency.
-  // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16?
-  return Singleton<LocaleAwareComparator>()->Compare(
-      WideToUTF16(base::SysNativeMBToWide(a.filename.value().c_str())),
-      WideToUTF16(base::SysNativeMBToWide(b.filename.value().c_str()))) < 0;
-}
-
 ///////////////////////////////////////////////
 // MemoryMappedFile
 
diff --git a/base/file_util_unittest.cc b/base/file_util_unittest.cc
index 5b606c9..b1f9fed 100644
--- a/base/file_util_unittest.cc
+++ b/base/file_util_unittest.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
@@ -876,41 +876,6 @@ TEST_F(FileUtilTest, DetectDirectoryTest) {
   EXPECT_TRUE(file_util::Delete(test_root, true));
 }
 
-static const struct goodbad_pair {
-  std::wstring bad_name;
-  std::wstring good_name;
-} kIllegalCharacterCases[] = {
-  {L"bad*file:name?.jpg", L"bad-file-name-.jpg"},
-  {L"**********::::.txt", L"--------------.txt"},
-  // We can't use UCNs (universal character names) for C0/C1 characters and
-  // U+007F, but \x escape is interpreted by MSVC and gcc as we intend.
-  {L"bad\x0003\x0091 file\u200E\u200Fname.png", L"bad-- file--name.png"},
-#if defined(OS_WIN)
-  {L"bad*file\\name.jpg", L"bad-file-name.jpg"},
-  {L"\t  bad*file\\name/.jpg ", L"bad-file-name-.jpg"},
-#elif defined(OS_POSIX)
-  {L"bad*file?name.jpg", L"bad-file-name.jpg"},
-  {L"\t  bad*file?name/.jpg ", L"bad-file-name-.jpg"},
-#endif
-  {L"this_file_name is okay!.mp3", L"this_file_name is okay!.mp3"},
-  {L"\u4E00\uAC00.mp3", L"\u4E00\uAC00.mp3"},
-  {L"\u0635\u200C\u0644.mp3", L"\u0635\u200C\u0644.mp3"},
-  {L"\U00010330\U00010331.mp3", L"\U00010330\U00010331.mp3"},
-  // Unassigned codepoints are ok.
-  {L"\u0378\U00040001.mp3", L"\u0378\U00040001.mp3"},
-  // Non-characters are not allowed.
-  {L"bad\uFFFFfile\U0010FFFEname.jpg ", L"bad-file-name.jpg"},
-  {L"bad\uFDD0file\uFDEFname.jpg ", L"bad-file-name.jpg"},
-};
-
-TEST_F(FileUtilTest, ReplaceIllegalCharactersTest) {
-  for (unsigned int i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
-    std::wstring bad_name(kIllegalCharacterCases[i].bad_name);
-    file_util::ReplaceIllegalCharacters(&bad_name, L'-');
-    EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name);
-  }
-}
-
 static const struct ReplaceExtensionCase {
   std::wstring file_name;
   FilePath::StringType extension;
@@ -1069,58 +1034,6 @@ TEST_F(FileUtilTest, FileEnumeratorTest) {
                                             // (we don't care what).
 }
 
-TEST_F(FileUtilTest, FileEnumeratorOrderTest) {
-  FilePath fileA = test_dir_.Append(FILE_PATH_LITERAL("a"));
-  FilePath fileB = test_dir_.Append(FILE_PATH_LITERAL("B"));
-  FilePath dirC = test_dir_.Append(FILE_PATH_LITERAL("C"));
-  FilePath dirD = test_dir_.Append(FILE_PATH_LITERAL("d"));
-  FilePath dirE = test_dir_.Append(FILE_PATH_LITERAL("e"));
-  FilePath fileF = test_dir_.Append(FILE_PATH_LITERAL("f"));
-
-  // Create files/directories in near random order.
-  CreateTextFile(fileF, L"");
-  CreateTextFile(fileA, L"");
-  CreateTextFile(fileB, L"");
-  EXPECT_TRUE(file_util::CreateDirectory(dirE));
-  EXPECT_TRUE(file_util::CreateDirectory(dirC));
-  EXPECT_TRUE(file_util::CreateDirectory(dirD));
-
-  // On Windows, files and directories are enumerated in the lexicographical
-  // order, ignoring case and whether they are files or directories. On posix,
-  // we order directories before files.
-  file_util::FileEnumerator enumerator(test_dir_, false, FILES_AND_DIRECTORIES);
-  FilePath cur_file = enumerator.Next();
-#if defined(OS_WIN)
-  EXPECT_EQ(fileA.value(), cur_file.value());
-  cur_file = enumerator.Next();
-  EXPECT_EQ(fileB.value(), cur_file.value());
-  cur_file = enumerator.Next();
-  EXPECT_EQ(dirC.value(), cur_file.value());
-  cur_file = enumerator.Next();
-  EXPECT_EQ(dirD.value(), cur_file.value());
-  cur_file = enumerator.Next();
-  EXPECT_EQ(dirE.value(), cur_file.value());
-  cur_file = enumerator.Next();
-  EXPECT_EQ(fileF.value(), cur_file.value());
-  cur_file = enumerator.Next();
-#elif defined(OS_POSIX)
-  EXPECT_EQ(dirC.value(), cur_file.value());
-  cur_file = enumerator.Next();
-  EXPECT_EQ(dirD.value(), cur_file.value());
-  cur_file = enumerator.Next();
-  EXPECT_EQ(dirE.value(), cur_file.value());
-  cur_file = enumerator.Next();
-  EXPECT_EQ(fileA.value(), cur_file.value());
-  cur_file = enumerator.Next();
-  EXPECT_EQ(fileB.value(), cur_file.value());
-  cur_file = enumerator.Next();
-  EXPECT_EQ(fileF.value(), cur_file.value());
-  cur_file = enumerator.Next();
-#endif
-
-  EXPECT_EQ(FILE_PATH_LITERAL(""), cur_file.value());
-}
-
 TEST_F(FileUtilTest, Contains) {
   FilePath data_dir = test_dir_.Append(FILE_PATH_LITERAL("FilePathTest"));
 
diff --git a/base/file_util_icu.cc b/base/i18n/file_util_icu.cc
index eeffa92..0bc9db6 100644
--- a/base/file_util_icu.cc
+++ b/base/i18n/file_util_icu.cc
@@ -2,21 +2,21 @@
 // Use of this source code is governed by a BSD-style license that can be
 // found in the LICENSE file.
 
-// File utilities that use the ICU library go in this file.  Functions using ICU
-// are separated from the other functions to prevent ICU being pulled in by the
-// linker if there is a false dependency.
-//
-// (The VS2005 linker finds such a false dependency and adds ~300K of ICU to
-// chrome.exe if this code lives in file_util.cc, even though none of this code
-// is called.)
+// File utilities that use the ICU library go in this file.
 
-#include "base/file_util.h"
+#include "base/i18n/file_util_icu.h"
 
+#include "base/file_path.h"
+#include "base/scoped_ptr.h"
 #include "base/singleton.h"
 #include "base/string_util.h"
+#include "base/sys_string_conversions.h"
+#include "build/build_config.h"
+#include "unicode/coll.h"
 #include "unicode/uniset.h"
 
 namespace {
+
 class IllegalCharacters {
  public:
   bool contains(UChar32 ucs4) {
@@ -72,6 +72,50 @@ IllegalCharacters::IllegalCharacters() {
   set->freeze();
 }
 
+class LocaleAwareComparator {
+ public:
+  LocaleAwareComparator() {
+    UErrorCode error_code = U_ZERO_ERROR;
+    // Use the default collator. The default locale should have been properly
+    // set by the time this constructor is called.
+    collator_.reset(icu::Collator::createInstance(error_code));
+    DCHECK(U_SUCCESS(error_code));
+    // Make it case-sensitive.
+    collator_->setStrength(icu::Collator::TERTIARY);
+    // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we
+    // do not pay performance penalty to guarantee sort order correctness for
+    // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a
+    // reasonable tradeoff because such file names should be rare and the sort
+    // order doesn't change much anyway.
+  }
+
+  // Note: A similar function is available in l10n_util.
+  // We cannot use it because base should not depend on l10n_util.
+  // TODO(yuzo): Move some of l10n_util to base.
+  int Compare(const string16& a, const string16& b) {
+    // We are not sure if Collator::compare is thread-safe.
+    // Use an AutoLock just in case.
+    AutoLock auto_lock(lock_);
+
+    UErrorCode error_code = U_ZERO_ERROR;
+    UCollationResult result = collator_->compare(
+        static_cast<const UChar*>(a.c_str()),
+        static_cast<int>(a.length()),
+        static_cast<const UChar*>(b.c_str()),
+        static_cast<int>(b.length()),
+        error_code);
+    DCHECK(U_SUCCESS(error_code));
+    return result;
+  }
+
+ private:
+  scoped_ptr<icu::Collator> collator_;
+  Lock lock_;
+  friend struct DefaultSingletonTraits<LocaleAwareComparator>;
+
+  DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator);
+};
+
 }  // namespace
 
 namespace file_util {
@@ -126,4 +170,24 @@ void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {
 #endif
 }
 
+bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) {
+#if defined(OS_WIN)
+  return Singleton<LocaleAwareComparator>()->Compare(a.value().c_str(),
+                                                     b.value().c_str()) < 0;
+
+#elif defined(OS_POSIX)
+  // On linux, the file system encoding is not defined. We assume
+  // SysNativeMBToWide takes care of it.
+  //
+  // ICU's collator can take strings in OS native encoding. But we convert the
+  // strings to UTF-16 ourselves to ensure conversion consistency.
+  // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16?
+  return Singleton<LocaleAwareComparator>()->Compare(
+      WideToUTF16(base::SysNativeMBToWide(a.value().c_str())),
+      WideToUTF16(base::SysNativeMBToWide(b.value().c_str()))) < 0;
+#else
+  #error Not implemented on your system
+#endif
+}
+
 }  // namespace
diff --git a/base/i18n/file_util_icu.h b/base/i18n/file_util_icu.h
new file mode 100644
index 0000000..c309a9e
--- /dev/null
+++ b/base/i18n/file_util_icu.h
@@ -0,0 +1,33 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// File utilities that use the ICU library go in this file.
+
+#include <string>
+
+#include "base/string16.h"
+
+class FilePath;
+
+namespace file_util {
+
+// Returns true if file_name does not have any illegal character. The input
+// param has the same restriction as that for ReplaceIllegalCharacters.
+bool IsFilenameLegal(const string16& file_name);
+
+// Replaces characters in 'file_name' that are illegal for file names with
+// 'replace_char'. 'file_name' must not be a full or relative path, but just the
+// file name component. Any leading or trailing whitespace in 'file_name' is
+// removed.
+// Example:
+//   file_name == "bad:file*name?.txt", changed to: "bad-file-name-.txt" when
+//   'replace_char' is '-'.
+void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char);
+
+// Compares two filenames using the current locale information. This can be
+// used to sort directory listings. It behaves like "operator<" for use in
+// std::sort.
+bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b);
+
+}  // namespace file_util
diff --git a/base/i18n/file_util_icu_unittest.cc b/base/i18n/file_util_icu_unittest.cc
new file mode 100644
index 0000000..aebcd0df
--- /dev/null
+++ b/base/i18n/file_util_icu_unittest.cc
@@ -0,0 +1,71 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/file_util_icu.h"
+
+#include "base/file_util.h"
+#include "base/path_service.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "testing/platform_test.h"
+
+// file_util winds up using autoreleased objects on the Mac, so this needs
+// to be a PlatformTest
+class FileUtilICUTest : public PlatformTest {
+ protected:
+  virtual void SetUp() {
+    PlatformTest::SetUp();
+    // Name a subdirectory of the temp directory.
+    ASSERT_TRUE(PathService::Get(base::DIR_TEMP, &test_dir_));
+    test_dir_ = test_dir_.Append(FILE_PATH_LITERAL("FileUtilTest"));
+
+    // Create a fresh, empty copy of this directory.
+    file_util::Delete(test_dir_, true);
+    file_util::CreateDirectory(test_dir_);
+  }
+  virtual void TearDown() {
+    PlatformTest::TearDown();
+    // Clean up test directory
+    ASSERT_TRUE(file_util::Delete(test_dir_, true));
+    ASSERT_FALSE(file_util::PathExists(test_dir_));
+  }
+
+  // the path to temporary directory used to contain the test operations
+  FilePath test_dir_;
+};
+
+static const struct goodbad_pair {
+  std::wstring bad_name;
+  std::wstring good_name;
+} kIllegalCharacterCases[] = {
+  {L"bad*file:name?.jpg", L"bad-file-name-.jpg"},
+  {L"**********::::.txt", L"--------------.txt"},
+  // We can't use UCNs (universal character names) for C0/C1 characters and
+  // U+007F, but \x escape is interpreted by MSVC and gcc as we intend.
+  {L"bad\x0003\x0091 file\u200E\u200Fname.png", L"bad-- file--name.png"},
+#if defined(OS_WIN)
+  {L"bad*file\\name.jpg", L"bad-file-name.jpg"},
+  {L"\t  bad*file\\name/.jpg ", L"bad-file-name-.jpg"},
+#elif defined(OS_POSIX)
+  {L"bad*file?name.jpg", L"bad-file-name.jpg"},
+  {L"\t  bad*file?name/.jpg ", L"bad-file-name-.jpg"},
+#endif
+  {L"this_file_name is okay!.mp3", L"this_file_name is okay!.mp3"},
+  {L"\u4E00\uAC00.mp3", L"\u4E00\uAC00.mp3"},
+  {L"\u0635\u200C\u0644.mp3", L"\u0635\u200C\u0644.mp3"},
+  {L"\U00010330\U00010331.mp3", L"\U00010330\U00010331.mp3"},
+  // Unassigned codepoints are ok.
+  {L"\u0378\U00040001.mp3", L"\u0378\U00040001.mp3"},
+  // Non-characters are not allowed.
+  {L"bad\uFFFFfile\U0010FFFEname.jpg ", L"bad-file-name.jpg"},
+  {L"bad\uFDD0file\uFDEFname.jpg ", L"bad-file-name.jpg"},
+};
+
+TEST_F(FileUtilICUTest, ReplaceIllegalCharactersTest) {
+  for (unsigned int i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
+    std::wstring bad_name(kIllegalCharacterCases[i].bad_name);
+    file_util::ReplaceIllegalCharacters(&bad_name, L'-');
+    EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name);
+  }
+}
+
diff --git a/base/i18n/number_formatting.cc b/base/i18n/number_formatting.cc
new file mode 100644
index 0000000..fef1b7d
--- /dev/null
+++ b/base/i18n/number_formatting.cc
@@ -0,0 +1,48 @@
+// Copyright (c) 2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/number_formatting.h"
+
+#include "base/logging.h"
+#include "base/singleton.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "unicode/numfmt.h"
+#include "unicode/ustring.h"
+
+namespace base {
+
+namespace {
+
+struct NumberFormatSingletonTraits
+    : public DefaultSingletonTraits<icu::NumberFormat> {
+  static icu::NumberFormat* New() {
+    UErrorCode status = U_ZERO_ERROR;
+    icu::NumberFormat* formatter = icu::NumberFormat::createInstance(status);
+    DCHECK(U_SUCCESS(status));
+    return formatter;
+  }
+  // There's no ICU call to destroy a NumberFormat object other than
+  // operator delete, so use the default Delete, which calls operator delete.
+  // This can cause problems if a different allocator is used by this file than
+  // by ICU.
+};
+
+}  // namespace
+
+string16 FormatNumber(int64 number) {
+  icu::NumberFormat* number_format =
+      Singleton<icu::NumberFormat, NumberFormatSingletonTraits>::get();
+
+  if (!number_format) {
+    // As a fallback, just return the raw number in a string.
+    return UTF8ToUTF16(StringPrintf("%lld", number));
+  }
+  icu::UnicodeString ustr;
+  number_format->format(number, ustr);
+
+  return string16(ustr.getBuffer(), static_cast<size_t>(ustr.length()));
+}
+
+}  // namespace base
diff --git a/base/i18n/number_formatting.h b/base/i18n/number_formatting.h
new file mode 100644
index 0000000..9fa2b18
--- /dev/null
+++ b/base/i18n/number_formatting.h
@@ -0,0 +1,19 @@
+// Copyright (c) 2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_I18N_NUMBER_FORMATTING_H_
+#define BASE_I18N_NUMBER_FORMATTING_H_
+
+#include <string>
+
+#include "base/basictypes.h"
+#include "base/string16.h"
+
+namespace base {
+
+string16 FormatNumber(int64 number);
+
+}  // namespace base
+
+#endif  // BASE_I18N_NUMBER_FORMATTING_H_
diff --git a/base/string_util.h b/base/string_util.h
index 254e18f..c6b9fb1 100644
--- a/base/string_util.h
+++ b/base/string_util.h
@@ -140,8 +140,6 @@ bool TrimString(const std::string& input,
 // The non-wide version has two functions:
 // * TrimWhitespaceASCII()
 //   This function is for ASCII strings and only looks for ASCII whitespace;
-// * TrimWhitespaceUTF8()
-//   This function is for UTF-8 strings and looks for Unicode whitespace.
 // Please choose the best one according to your usage.
 // NOTE: Safe to use the same variable for both input and output.
 enum TrimPositions {
@@ -156,9 +154,6 @@ TrimPositions TrimWhitespace(const std::wstring& input,
 TrimPositions TrimWhitespaceASCII(const std::string& input,
                                   TrimPositions positions,
                                   std::string* output);
-TrimPositions TrimWhitespaceUTF8(const std::string& input,
-                                 TrimPositions positions,
-                                 std::string* output);
 
 // Deprecated. This function is only for backward compatibility and calls
 // TrimWhitespaceASCII().
diff --git a/base/string_util_icu.cc b/base/string_util_icu.cc
deleted file mode 100644
index 68fbd10..0000000
--- a/base/string_util_icu.cc
+++ /dev/null
@@ -1,80 +0,0 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "base/string_util.h"
-
-#include <string.h>
-#include <vector>
-
-#include "base/basictypes.h"
-#include "base/logging.h"
-#include "base/singleton.h"
-#include "unicode/numfmt.h"
-#include "unicode/ustring.h"
-
-// Number formatting -----------------------------------------------------------
-
-namespace {
-
-struct NumberFormatSingletonTraits
-    : public DefaultSingletonTraits<icu::NumberFormat> {
-  static icu::NumberFormat* New() {
-    UErrorCode status = U_ZERO_ERROR;
-    icu::NumberFormat* formatter = icu::NumberFormat::createInstance(status);
-    DCHECK(U_SUCCESS(status));
-    return formatter;
-  }
-  // There's no ICU call to destroy a NumberFormat object other than
-  // operator delete, so use the default Delete, which calls operator delete.
-  // This can cause problems if a different allocator is used by this file than
-  // by ICU.
-};
-
-}  // namespace
-
-std::wstring FormatNumber(int64 number) {
-  icu::NumberFormat* number_format =
-      Singleton<icu::NumberFormat, NumberFormatSingletonTraits>::get();
-
-  if (!number_format) {
-    // As a fallback, just return the raw number in a string.
-    return StringPrintf(L"%lld", number);
-  }
-  icu::UnicodeString ustr;
-  number_format->format(number, ustr);
-
-#if defined(WCHAR_T_IS_UTF16)
-  return std::wstring(ustr.getBuffer(),
-                      static_cast<std::wstring::size_type>(ustr.length()));
-#elif defined(WCHAR_T_IS_UTF32)
-  wchar_t buffer[64];  // A int64 is less than 20 chars long,  so 64 chars
-                       // leaves plenty of room for formating stuff.
-  int length = 0;
-  UErrorCode error = U_ZERO_ERROR;
-  u_strToWCS(buffer, 64, &length, ustr.getBuffer(), ustr.length() , &error);
-  if (U_FAILURE(error)) {
-    NOTREACHED();
-    // As a fallback, just return the raw number in a string.
-    return StringPrintf(L"%lld", number);
-  }
-  return std::wstring(buffer, static_cast<std::wstring::size_type>(length));
-#endif  // defined(WCHAR_T_IS_UTF32)
-}
-
-// Although this function isn't specific to ICU, we implemented it here so
-// that chrome.exe won't pull it in.  Moving this function to string_util.cc
-// causes chrome.exe to grow by 400k because of more ICU being pulled in.
-TrimPositions TrimWhitespaceUTF8(const std::string& input,
-                                 TrimPositions positions,
-                                 std::string* output) {
-  // This implementation is not so fast since it converts the text encoding
-  // twice. Please feel free to file a bug if this function hurts the
-  // performance of Chrome.
-  DCHECK(IsStringUTF8(input));
-  std::wstring input_wide = UTF8ToWide(input);
-  std::wstring output_wide;
-  TrimPositions result = TrimWhitespace(input_wide, positions, &output_wide);
-  *output = WideToUTF8(output_wide);
-  return result;
-}
diff --git a/base/string_util_unittest.cc b/base/string_util_unittest.cc
index 2723541..a70b03e 100644
--- a/base/string_util_unittest.cc
+++ b/base/string_util_unittest.cc
@@ -100,52 +100,6 @@ TEST(StringUtilTest, TrimWhitespace) {
   }
 }
 
-static const struct trim_case_utf8 {
-  const char* input;
-  const TrimPositions positions;
-  const char* output;
-  const TrimPositions return_value;
-} trim_cases_utf8[] = {
-  // UTF-8 strings that start (and end) with Unicode space characters
-  // (including zero-width spaces).
-  {"\xE2\x80\x80Test String\xE2\x80\x81", TRIM_ALL, "Test String", TRIM_ALL},
-  {"\xE2\x80\x82Test String\xE2\x80\x83", TRIM_ALL, "Test String", TRIM_ALL},
-  {"\xE2\x80\x84Test String\xE2\x80\x85", TRIM_ALL, "Test String", TRIM_ALL},
-  {"\xE2\x80\x86Test String\xE2\x80\x87", TRIM_ALL, "Test String", TRIM_ALL},
-  {"\xE2\x80\x88Test String\xE2\x80\x8A", TRIM_ALL, "Test String", TRIM_ALL},
-  {"\xE3\x80\x80Test String\xE3\x80\x80", TRIM_ALL, "Test String", TRIM_ALL},
-  // UTF-8 strings that end with 0x85 (NEL in ISO-8859).
-  {"\xD0\x85", TRIM_TRAILING, "\xD0\x85", TRIM_NONE},
-  {"\xD9\x85", TRIM_TRAILING, "\xD9\x85", TRIM_NONE},
-  {"\xEC\x97\x85", TRIM_TRAILING, "\xEC\x97\x85", TRIM_NONE},
-  {"\xF0\x90\x80\x85", TRIM_TRAILING, "\xF0\x90\x80\x85", TRIM_NONE},
-  // UTF-8 strings that end with 0xA0 (non-break space in ISO-8859-1).
-  {"\xD0\xA0", TRIM_TRAILING, "\xD0\xA0", TRIM_NONE},
-  {"\xD9\xA0", TRIM_TRAILING, "\xD9\xA0", TRIM_NONE},
-  {"\xEC\x97\xA0", TRIM_TRAILING, "\xEC\x97\xA0", TRIM_NONE},
-  {"\xF0\x90\x80\xA0", TRIM_TRAILING, "\xF0\x90\x80\xA0", TRIM_NONE},
-};
-
-TEST(StringUtilTest, TrimWhitespaceUTF8) {
-  std::string output_ascii;
-  for (size_t i = 0; i < arraysize(trim_cases_ascii); ++i) {
-    const trim_case_ascii& value = trim_cases_ascii[i];
-    EXPECT_EQ(value.return_value,
-              TrimWhitespaceASCII(value.input, value.positions, &output_ascii));
-    EXPECT_EQ(value.output, output_ascii);
-  }
-
-  // Test that TrimWhiteSpaceUTF8() can remove Unicode space characters and
-  // prevent from removing UTF-8 characters that end with an ISO-8859 NEL.
-  std::string output_utf8;
-  for (size_t i = 0; i < arraysize(trim_cases_utf8); ++i) {
-    const trim_case_utf8& value = trim_cases_utf8[i];
-    EXPECT_EQ(value.return_value,
-              TrimWhitespaceUTF8(value.input, value.positions, &output_utf8));
-    EXPECT_EQ(value.output, output_utf8);
-  }
-}
-
 static const struct collapse_case {
   const wchar_t* input;
   const bool trim;
author	brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-10-08 17:38:30 +0000
committer	brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-10-08 17:38:30 +0000
commit	d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036 (patch)
tree	1c2ee733bf62a44c31dc11f76dad53243a84439f /base
parent	e91d532339c854ff0a082c6562a519647524fa66 (diff)
download	chromium_src-d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036.zip chromium_src-d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036.tar.gz chromium_src-d0767cb54b2b5ee4d9cf00b3ee0fa585826b4036.tar.bz2