Move more ICU-dependent stuff from base into base/i18n. Some test stuff also

depended on this, so to make the DEPS work out, I made a new base/test directory where I moved the testing-related files into a new directory base/test. TEST=none BUG=none Review URL: http://codereview.chromium.org/266038 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@28569 0039d316-1c4b-4281-b951-d872f2087c98
author: brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-10-09 18:20:30 +0000
committer: brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-10-09 18:20:30 +0000
commit: fb895c694e2117c29b6afb699095f6e187a44da7 (patch)
tree: 04a0d1434a470f55f0e639a3e6f15c18416d80e2 /base/i18n
parent: 8ecb6aa0a92d5426c2c98c23e0e3f3c4f06972c5 (diff)
download: chromium_src-fb895c694e2117c29b6afb699095f6e187a44da7.zip
chromium_src-fb895c694e2117c29b6afb699095f6e187a44da7.tar.gz
chromium_src-fb895c694e2117c29b6afb699095f6e187a44da7.tar.bz2
7 files changed, 469 insertions, 0 deletions
diff --git a/base/i18n/icu_util.cc b/base/i18n/icu_util.cc
new file mode 100644
index 0000000..6239a01
--- /dev/null
+++ b/base/i18n/icu_util.cc
@@ -0,0 +1,99 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/icu_util.h"
+
+#include "build/build_config.h"
+
+#if defined(OS_WIN)
+#include <windows.h>
+#endif
+
+#include <string>
+
+#include "base/file_path.h"
+#include "base/file_util.h"
+#include "base/logging.h"
+#include "base/path_service.h"
+#include "base/string_util.h"
+#include "base/sys_string_conversions.h"
+#include "unicode/putil.h"
+#include "unicode/udata.h"
+
+#define ICU_UTIL_DATA_FILE   0
+#define ICU_UTIL_DATA_SHARED 1
+#define ICU_UTIL_DATA_STATIC 2
+
+#ifndef ICU_UTIL_DATA_IMPL
+
+#if defined(OS_WIN)
+#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_SHARED
+#elif defined(OS_MACOSX)
+#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_STATIC
+#elif defined(OS_LINUX)
+#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_FILE
+#endif
+
+#endif  // ICU_UTIL_DATA_IMPL
+
+#if defined(OS_WIN)
+#define ICU_UTIL_DATA_SYMBOL "icudt" U_ICU_VERSION_SHORT "_dat"
+#define ICU_UTIL_DATA_SHARED_MODULE_NAME "icudt" U_ICU_VERSION_SHORT ".dll"
+#endif
+
+namespace icu_util {
+
+bool Initialize() {
+#ifndef NDEBUG
+  // Assert that we are not called more than once.  Even though calling this
+  // function isn't harmful (ICU can handle it), being called twice probably
+  // indicates a programming error.
+  static bool called_once = false;
+  DCHECK(!called_once);
+  called_once = true;
+#endif
+
+#if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_SHARED)
+  // We expect to find the ICU data module alongside the current module.
+  std::wstring data_path;
+  PathService::Get(base::DIR_MODULE, &data_path);
+  file_util::AppendToPath(&data_path,
+                          ASCIIToWide(ICU_UTIL_DATA_SHARED_MODULE_NAME));
+
+  HMODULE module = LoadLibrary(data_path.c_str());
+  if (!module) {
+    LOG(ERROR) << "Failed to load " << ICU_UTIL_DATA_SHARED_MODULE_NAME;
+    return false;
+  }
+
+  FARPROC addr = GetProcAddress(module, ICU_UTIL_DATA_SYMBOL);
+  if (!addr) {
+    LOG(ERROR) << ICU_UTIL_DATA_SYMBOL << ": not found in "
+               << ICU_UTIL_DATA_SHARED_MODULE_NAME;
+    return false;
+  }
+
+  UErrorCode err = U_ZERO_ERROR;
+  udata_setCommonData(reinterpret_cast<void*>(addr), &err);
+  return err == U_ZERO_ERROR;
+#elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC)
+  // Mac bundles the ICU data in.
+  return true;
+#elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
+  // For now, expect the data file to be alongside the executable.
+  // This is sufficient while we work on unit tests, but will eventually
+  // likely live in a data directory.
+  FilePath data_path;
+  bool path_ok = PathService::Get(base::DIR_EXE, &data_path);
+  DCHECK(path_ok);
+  u_setDataDirectory(data_path.value().c_str());
+  // Only look for the packaged data file;
+  // the default behavior is to look for individual files.
+  UErrorCode err = U_ZERO_ERROR;
+  udata_setFileAccess(UDATA_ONLY_PACKAGES, &err);
+  return err == U_ZERO_ERROR;
+#endif
+}
+
+}  // namespace icu_util
diff --git a/base/i18n/icu_util.h b/base/i18n/icu_util.h
new file mode 100644
index 0000000..56eaa37
--- /dev/null
+++ b/base/i18n/icu_util.h
@@ -0,0 +1,16 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_I18N_ICU_UTIL_H_
+#define BASE_I18N_ICU_UTIL_H_
+
+namespace icu_util {
+
+// Call this function to load ICU's data tables for the current process.  This
+// function should be called before ICU is used.
+bool Initialize();
+
+}  // namespace icu_util
+
+#endif  // BASE_I18N_ICU_UTIL_H_
diff --git a/base/i18n/time_formatting.cc b/base/i18n/time_formatting.cc
new file mode 100644
index 0000000..f031cf7
--- /dev/null
+++ b/base/i18n/time_formatting.cc
@@ -0,0 +1,72 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/time_formatting.h"
+
+#include "base/logging.h"
+#include "base/scoped_ptr.h"
+#include "base/string_util.h"
+#include "base/time.h"
+#include "unicode/datefmt.h"
+
+using base::Time;
+
+namespace {
+
+std::wstring TimeFormat(const icu::DateFormat* formatter,
+                        const Time& time) {
+  DCHECK(formatter);
+  icu::UnicodeString date_string;
+
+  formatter->format(static_cast<UDate>(time.ToDoubleT() * 1000), date_string);
+  std::wstring output;
+  bool success = UTF16ToWide(date_string.getBuffer(), date_string.length(),
+      &output);
+  DCHECK(success);
+  return output;
+}
+
+}
+
+namespace base {
+
+std::wstring TimeFormatTimeOfDay(const Time& time) {
+  // We can omit the locale parameter because the default should match
+  // Chrome's application locale.
+  scoped_ptr<icu::DateFormat> formatter(
+      icu::DateFormat::createTimeInstance(icu::DateFormat::kShort));
+  return TimeFormat(formatter.get(), time);
+}
+
+std::wstring TimeFormatShortDate(const Time& time) {
+  scoped_ptr<icu::DateFormat> formatter(
+      icu::DateFormat::createDateInstance(icu::DateFormat::kMedium));
+  return TimeFormat(formatter.get(), time);
+}
+
+std::wstring TimeFormatShortDateNumeric(const Time& time) {
+  scoped_ptr<icu::DateFormat> formatter(
+      icu::DateFormat::createDateInstance(icu::DateFormat::kShort));
+  return TimeFormat(formatter.get(), time);
+}
+
+std::wstring TimeFormatShortDateAndTime(const Time& time) {
+  scoped_ptr<icu::DateFormat> formatter(
+      icu::DateFormat::createDateTimeInstance(icu::DateFormat::kShort));
+  return TimeFormat(formatter.get(), time);
+}
+
+std::wstring TimeFormatFriendlyDateAndTime(const Time& time) {
+  scoped_ptr<icu::DateFormat> formatter(
+      icu::DateFormat::createDateTimeInstance(icu::DateFormat::kFull));
+  return TimeFormat(formatter.get(), time);
+}
+
+std::wstring TimeFormatFriendlyDate(const Time& time) {
+  scoped_ptr<icu::DateFormat> formatter(icu::DateFormat::createDateInstance(
+      icu::DateFormat::kFull));
+  return TimeFormat(formatter.get(), time);
+}
+
+}  // namespace base
diff --git a/base/i18n/time_formatting.h b/base/i18n/time_formatting.h
new file mode 100644
index 0000000..dd623af
--- /dev/null
+++ b/base/i18n/time_formatting.h
@@ -0,0 +1,40 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Basic time formatting methods.  These methods use the current locale
+// formatting for displaying the time.
+
+#ifndef BASE_I18N_TIME_FORMATTING_H_
+#define BASE_I18N_TIME_FORMATTING_H_
+
+#include <string>
+
+namespace base {
+
+class Time;
+
+// Returns the time of day, e.g., "3:07 PM".
+std::wstring TimeFormatTimeOfDay(const Time& time);
+
+// Returns a shortened date, e.g. "Nov 7, 2007"
+std::wstring TimeFormatShortDate(const Time& time);
+
+// Returns a numeric date such as 12/13/52.
+std::wstring TimeFormatShortDateNumeric(const Time& time);
+
+// Formats a time in a friendly sentence format, e.g.
+// "Monday, March 6, 2008 2:44:30 PM".
+std::wstring TimeFormatShortDateAndTime(const Time& time);
+
+// Formats a time in a friendly sentence format, e.g.
+// "Monday, March 6, 2008 2:44:30 PM".
+std::wstring TimeFormatFriendlyDateAndTime(const Time& time);
+
+// Formats a time in a friendly sentence format, e.g.
+// "Monday, March 6, 2008".
+std::wstring TimeFormatFriendlyDate(const Time& time);
+
+}  // namespace base
+
+#endif  // BASE_I18N_TIME_FORMATTING_H_
diff --git a/base/i18n/word_iterator.cc b/base/i18n/word_iterator.cc
new file mode 100644
index 0000000..45a06b9
--- /dev/null
+++ b/base/i18n/word_iterator.cc
@@ -0,0 +1,87 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/word_iterator.h"
+
+#include "base/logging.h"
+#include "unicode/ubrk.h"
+#include "unicode/ustring.h"
+
+const size_t npos = -1;
+
+WordIterator::WordIterator(const std::wstring& str, BreakType break_type)
+    : iter_(NULL),
+      string_(str),
+      break_type_(break_type),
+      prev_(npos),
+      pos_(0) {
+}
+
+WordIterator::~WordIterator() {
+  if (iter_)
+    ubrk_close(iter_);
+}
+
+bool WordIterator::Init() {
+  UErrorCode status = U_ZERO_ERROR;
+  UBreakIteratorType break_type;
+  switch (break_type_) {
+    case BREAK_WORD:
+      break_type = UBRK_WORD;
+      break;
+    case BREAK_LINE:
+      break_type = UBRK_LINE;
+      break;
+    default:
+      NOTREACHED();
+      break_type = UBRK_LINE;
+  }
+#if defined(WCHAR_T_IS_UTF16)
+  iter_ = ubrk_open(break_type, NULL,
+                    string_.data(), static_cast<int32_t>(string_.size()),
+                    &status);
+#else  // WCHAR_T_IS_UTF16
+  // When wchar_t is wider than UChar (16 bits), transform |string_| into a
+  // UChar* string.  Size the UChar* buffer to be large enough to hold twice
+  // as many UTF-16 code points as there are UCS-4 characters, in case each
+  // character translates to a UTF-16 surrogate pair, and leave room for a NUL
+  // terminator.
+  // TODO(avi): avoid this alloc
+  chars_.resize(string_.length() * sizeof(UChar) + 1);
+
+  UErrorCode error = U_ZERO_ERROR;
+  int32_t destLength;
+  u_strFromWCS(&chars_[0], chars_.size(), &destLength, string_.data(),
+               string_.length(), &error);
+
+  iter_ = ubrk_open(break_type, NULL, &chars_[0], destLength, &status);
+#endif
+  if (U_FAILURE(status)) {
+    NOTREACHED() << "ubrk_open failed";
+    return false;
+  }
+  ubrk_first(iter_);  // Move the iterator to the beginning of the string.
+  return true;
+}
+
+bool WordIterator::Advance() {
+  prev_ = pos_;
+  const int32_t pos = ubrk_next(iter_);
+  if (pos == UBRK_DONE) {
+    pos_ = npos;
+    return false;
+  } else {
+    pos_ = static_cast<size_t>(pos);
+    return true;
+  }
+}
+
+bool WordIterator::IsWord() const {
+  return (ubrk_getRuleStatus(iter_) != UBRK_WORD_NONE);
+}
+
+std::wstring WordIterator::GetWord() const {
+  DCHECK(prev_ != npos && pos_ != npos);
+  return string_.substr(prev_, pos_ - prev_);
+}
diff --git a/base/i18n/word_iterator.h b/base/i18n/word_iterator.h
new file mode 100644
index 0000000..c9648ca
--- /dev/null
+++ b/base/i18n/word_iterator.h
@@ -0,0 +1,87 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_I18N_WORD_ITERATOR_H_
+#define BASE_I18N_WORD_ITERATOR_H_
+
+#include <string>
+#include <vector>
+
+#include "unicode/uchar.h"
+
+#include "base/basictypes.h"
+
+// The WordIterator class iterates through the words and word breaks
+// in a string.  (In the string " foo bar! ", the word breaks are at the
+// periods in ". .foo. .bar.!. .".)
+//
+// To extract the words from a string, move a WordIterator through the
+// string and test whether IsWord() is true.  E.g.,
+//   WordIterator iter(str, WordIterator::BREAK_WORD);
+//   if (!iter.Init()) return false;
+//   while (iter.Advance()) {
+//     if (iter.IsWord()) {
+//       // region [iter.prev(),iter.pos()) contains a word.
+//       LOG(INFO) << "word: " << iter.GetWord();
+//     }
+//   }
+
+
+class WordIterator {
+ public:
+  enum BreakType {
+    BREAK_WORD,
+    BREAK_LINE
+  };
+
+  // Requires |str| to live as long as the WordIterator does.
+  WordIterator(const std::wstring& str, BreakType break_type);
+  ~WordIterator();
+
+  // Init() must be called before any of the iterators are valid.
+  // Returns false if ICU failed to initialize.
+  bool Init();
+
+  // Return the current break position within the string,
+  // or WordIterator::npos when done.
+  size_t pos() const { return pos_; }
+  // Return the value of pos() returned before Advance() was last called.
+  size_t prev() const { return prev_; }
+
+  // Advance to the next break.  Returns false if we've run past the end of
+  // the string.  (Note that the very last "word break" is after the final
+  // character in the string, and when we advance to that position it's the
+  // last time Advance() returns true.)
+  bool Advance();
+
+  // Returns true if the break we just hit is the end of a word.
+  // (Otherwise, the break iterator just skipped over e.g. whitespace
+  // or punctuation.)
+  bool IsWord() const;
+
+  // Return the word between prev() and pos().
+  // Advance() must have been called successfully at least once
+  // for pos() to have advanced to somewhere useful.
+  std::wstring GetWord() const;
+
+ private:
+  // ICU iterator.
+  void* iter_;
+#if !defined(WCHAR_T_IS_UTF16)
+  std::vector<UChar> chars_;
+#endif
+
+  // The string we're iterating over.
+  const std::wstring& string_;
+
+  // The breaking style (word/line).
+  BreakType break_type_;
+
+  // Previous and current iterator positions.
+  size_t prev_, pos_;
+
+  DISALLOW_COPY_AND_ASSIGN(WordIterator);
+};
+
+#endif  // BASE_I18N_WORD_ITERATOR_H__
diff --git a/base/i18n/word_iterator_unittest.cc b/base/i18n/word_iterator_unittest.cc
new file mode 100644
index 0000000..0d28370
--- /dev/null
+++ b/base/i18n/word_iterator_unittest.cc
@@ -0,0 +1,68 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/word_iterator.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+TEST(WordIteratorTest, BreakWord) {
+  std::wstring str(L" foo bar! \npouet boom");
+  WordIterator iter(str, WordIterator::BREAK_WORD);
+  ASSERT_TRUE(iter.Init());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_FALSE(iter.IsWord());
+  EXPECT_EQ(L" ", iter.GetWord());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_TRUE(iter.IsWord());
+  EXPECT_EQ(L"foo", iter.GetWord());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_FALSE(iter.IsWord());
+  EXPECT_EQ(L" ", iter.GetWord());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_TRUE(iter.IsWord());
+  EXPECT_EQ(L"bar", iter.GetWord());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_FALSE(iter.IsWord());
+  EXPECT_EQ(L"!", iter.GetWord());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_FALSE(iter.IsWord());
+  EXPECT_EQ(L" ", iter.GetWord());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_FALSE(iter.IsWord());
+  EXPECT_EQ(L"\n", iter.GetWord());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_TRUE(iter.IsWord());
+  EXPECT_EQ(L"pouet", iter.GetWord());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_FALSE(iter.IsWord());
+  EXPECT_EQ(L" ", iter.GetWord());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_TRUE(iter.IsWord());
+  EXPECT_EQ(L"boom", iter.GetWord());
+  EXPECT_FALSE(iter.Advance());
+  EXPECT_FALSE(iter.IsWord());
+}
+
+TEST(WordIteratorTest, BreakLine) {
+  std::wstring str(L" foo bar! \npouet boom");
+  WordIterator iter(str, WordIterator::BREAK_LINE);
+  ASSERT_TRUE(iter.Init());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_FALSE(iter.IsWord());
+  EXPECT_EQ(L" ", iter.GetWord());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_FALSE(iter.IsWord());
+  EXPECT_EQ(L"foo ", iter.GetWord());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_TRUE(iter.IsWord());
+  EXPECT_EQ(L"bar! \n", iter.GetWord());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_FALSE(iter.IsWord());
+  EXPECT_EQ(L"pouet ", iter.GetWord());
+  EXPECT_TRUE(iter.Advance());
+  EXPECT_FALSE(iter.IsWord());
+  EXPECT_EQ(L"boom", iter.GetWord());
+  EXPECT_FALSE(iter.Advance());
+  EXPECT_FALSE(iter.IsWord());
+}
author	brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-10-09 18:20:30 +0000
committer	brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-10-09 18:20:30 +0000
commit	fb895c694e2117c29b6afb699095f6e187a44da7 (patch)
tree	04a0d1434a470f55f0e639a3e6f15c18416d80e2 /base/i18n
parent	8ecb6aa0a92d5426c2c98c23e0e3f3c4f06972c5 (diff)
download	chromium_src-fb895c694e2117c29b6afb699095f6e187a44da7.zip chromium_src-fb895c694e2117c29b6afb699095f6e187a44da7.tar.gz chromium_src-fb895c694e2117c29b6afb699095f6e187a44da7.tar.bz2