summaryrefslogtreecommitdiffstats
path: root/base/i18n
diff options
context:
space:
mode:
authorbrettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-09 18:20:30 +0000
committerbrettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-10-09 18:20:30 +0000
commitfb895c694e2117c29b6afb699095f6e187a44da7 (patch)
tree04a0d1434a470f55f0e639a3e6f15c18416d80e2 /base/i18n
parent8ecb6aa0a92d5426c2c98c23e0e3f3c4f06972c5 (diff)
downloadchromium_src-fb895c694e2117c29b6afb699095f6e187a44da7.zip
chromium_src-fb895c694e2117c29b6afb699095f6e187a44da7.tar.gz
chromium_src-fb895c694e2117c29b6afb699095f6e187a44da7.tar.bz2
Move more ICU-dependent stuff from base into base/i18n. Some test stuff also
depended on this, so to make the DEPS work out, I made a new base/test directory where I moved the testing-related files into a new directory base/test. TEST=none BUG=none Review URL: http://codereview.chromium.org/266038 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@28569 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/i18n')
-rw-r--r--base/i18n/icu_util.cc99
-rw-r--r--base/i18n/icu_util.h16
-rw-r--r--base/i18n/time_formatting.cc72
-rw-r--r--base/i18n/time_formatting.h40
-rw-r--r--base/i18n/word_iterator.cc87
-rw-r--r--base/i18n/word_iterator.h87
-rw-r--r--base/i18n/word_iterator_unittest.cc68
7 files changed, 469 insertions, 0 deletions
diff --git a/base/i18n/icu_util.cc b/base/i18n/icu_util.cc
new file mode 100644
index 0000000..6239a01
--- /dev/null
+++ b/base/i18n/icu_util.cc
@@ -0,0 +1,99 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/icu_util.h"
+
+#include "build/build_config.h"
+
+#if defined(OS_WIN)
+#include <windows.h>
+#endif
+
+#include <string>
+
+#include "base/file_path.h"
+#include "base/file_util.h"
+#include "base/logging.h"
+#include "base/path_service.h"
+#include "base/string_util.h"
+#include "base/sys_string_conversions.h"
+#include "unicode/putil.h"
+#include "unicode/udata.h"
+
+#define ICU_UTIL_DATA_FILE 0
+#define ICU_UTIL_DATA_SHARED 1
+#define ICU_UTIL_DATA_STATIC 2
+
+#ifndef ICU_UTIL_DATA_IMPL
+
+#if defined(OS_WIN)
+#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_SHARED
+#elif defined(OS_MACOSX)
+#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_STATIC
+#elif defined(OS_LINUX)
+#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_FILE
+#endif
+
+#endif // ICU_UTIL_DATA_IMPL
+
+#if defined(OS_WIN)
+#define ICU_UTIL_DATA_SYMBOL "icudt" U_ICU_VERSION_SHORT "_dat"
+#define ICU_UTIL_DATA_SHARED_MODULE_NAME "icudt" U_ICU_VERSION_SHORT ".dll"
+#endif
+
+namespace icu_util {
+
+bool Initialize() {
+#ifndef NDEBUG
+ // Assert that we are not called more than once. Even though calling this
+ // function isn't harmful (ICU can handle it), being called twice probably
+ // indicates a programming error.
+ static bool called_once = false;
+ DCHECK(!called_once);
+ called_once = true;
+#endif
+
+#if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_SHARED)
+ // We expect to find the ICU data module alongside the current module.
+ std::wstring data_path;
+ PathService::Get(base::DIR_MODULE, &data_path);
+ file_util::AppendToPath(&data_path,
+ ASCIIToWide(ICU_UTIL_DATA_SHARED_MODULE_NAME));
+
+ HMODULE module = LoadLibrary(data_path.c_str());
+ if (!module) {
+ LOG(ERROR) << "Failed to load " << ICU_UTIL_DATA_SHARED_MODULE_NAME;
+ return false;
+ }
+
+ FARPROC addr = GetProcAddress(module, ICU_UTIL_DATA_SYMBOL);
+ if (!addr) {
+ LOG(ERROR) << ICU_UTIL_DATA_SYMBOL << ": not found in "
+ << ICU_UTIL_DATA_SHARED_MODULE_NAME;
+ return false;
+ }
+
+ UErrorCode err = U_ZERO_ERROR;
+ udata_setCommonData(reinterpret_cast<void*>(addr), &err);
+ return err == U_ZERO_ERROR;
+#elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC)
+ // Mac bundles the ICU data in.
+ return true;
+#elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE)
+ // For now, expect the data file to be alongside the executable.
+ // This is sufficient while we work on unit tests, but will eventually
+ // likely live in a data directory.
+ FilePath data_path;
+ bool path_ok = PathService::Get(base::DIR_EXE, &data_path);
+ DCHECK(path_ok);
+ u_setDataDirectory(data_path.value().c_str());
+ // Only look for the packaged data file;
+ // the default behavior is to look for individual files.
+ UErrorCode err = U_ZERO_ERROR;
+ udata_setFileAccess(UDATA_ONLY_PACKAGES, &err);
+ return err == U_ZERO_ERROR;
+#endif
+}
+
+} // namespace icu_util
diff --git a/base/i18n/icu_util.h b/base/i18n/icu_util.h
new file mode 100644
index 0000000..56eaa37
--- /dev/null
+++ b/base/i18n/icu_util.h
@@ -0,0 +1,16 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_I18N_ICU_UTIL_H_
+#define BASE_I18N_ICU_UTIL_H_
+
+namespace icu_util {
+
+// Call this function to load ICU's data tables for the current process. This
+// function should be called before ICU is used.
+bool Initialize();
+
+} // namespace icu_util
+
+#endif // BASE_I18N_ICU_UTIL_H_
diff --git a/base/i18n/time_formatting.cc b/base/i18n/time_formatting.cc
new file mode 100644
index 0000000..f031cf7
--- /dev/null
+++ b/base/i18n/time_formatting.cc
@@ -0,0 +1,72 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/time_formatting.h"
+
+#include "base/logging.h"
+#include "base/scoped_ptr.h"
+#include "base/string_util.h"
+#include "base/time.h"
+#include "unicode/datefmt.h"
+
+using base::Time;
+
+namespace {
+
+std::wstring TimeFormat(const icu::DateFormat* formatter,
+ const Time& time) {
+ DCHECK(formatter);
+ icu::UnicodeString date_string;
+
+ formatter->format(static_cast<UDate>(time.ToDoubleT() * 1000), date_string);
+ std::wstring output;
+ bool success = UTF16ToWide(date_string.getBuffer(), date_string.length(),
+ &output);
+ DCHECK(success);
+ return output;
+}
+
+}
+
+namespace base {
+
+std::wstring TimeFormatTimeOfDay(const Time& time) {
+ // We can omit the locale parameter because the default should match
+ // Chrome's application locale.
+ scoped_ptr<icu::DateFormat> formatter(
+ icu::DateFormat::createTimeInstance(icu::DateFormat::kShort));
+ return TimeFormat(formatter.get(), time);
+}
+
+std::wstring TimeFormatShortDate(const Time& time) {
+ scoped_ptr<icu::DateFormat> formatter(
+ icu::DateFormat::createDateInstance(icu::DateFormat::kMedium));
+ return TimeFormat(formatter.get(), time);
+}
+
+std::wstring TimeFormatShortDateNumeric(const Time& time) {
+ scoped_ptr<icu::DateFormat> formatter(
+ icu::DateFormat::createDateInstance(icu::DateFormat::kShort));
+ return TimeFormat(formatter.get(), time);
+}
+
+std::wstring TimeFormatShortDateAndTime(const Time& time) {
+ scoped_ptr<icu::DateFormat> formatter(
+ icu::DateFormat::createDateTimeInstance(icu::DateFormat::kShort));
+ return TimeFormat(formatter.get(), time);
+}
+
+std::wstring TimeFormatFriendlyDateAndTime(const Time& time) {
+ scoped_ptr<icu::DateFormat> formatter(
+ icu::DateFormat::createDateTimeInstance(icu::DateFormat::kFull));
+ return TimeFormat(formatter.get(), time);
+}
+
+std::wstring TimeFormatFriendlyDate(const Time& time) {
+ scoped_ptr<icu::DateFormat> formatter(icu::DateFormat::createDateInstance(
+ icu::DateFormat::kFull));
+ return TimeFormat(formatter.get(), time);
+}
+
+} // namespace base
diff --git a/base/i18n/time_formatting.h b/base/i18n/time_formatting.h
new file mode 100644
index 0000000..dd623af
--- /dev/null
+++ b/base/i18n/time_formatting.h
@@ -0,0 +1,40 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// Basic time formatting methods. These methods use the current locale
+// formatting for displaying the time.
+
+#ifndef BASE_I18N_TIME_FORMATTING_H_
+#define BASE_I18N_TIME_FORMATTING_H_
+
+#include <string>
+
+namespace base {
+
+class Time;
+
+// Returns the time of day, e.g., "3:07 PM".
+std::wstring TimeFormatTimeOfDay(const Time& time);
+
+// Returns a shortened date, e.g. "Nov 7, 2007"
+std::wstring TimeFormatShortDate(const Time& time);
+
+// Returns a numeric date such as 12/13/52.
+std::wstring TimeFormatShortDateNumeric(const Time& time);
+
+// Formats a time in a friendly sentence format, e.g.
+// "Monday, March 6, 2008 2:44:30 PM".
+std::wstring TimeFormatShortDateAndTime(const Time& time);
+
+// Formats a time in a friendly sentence format, e.g.
+// "Monday, March 6, 2008 2:44:30 PM".
+std::wstring TimeFormatFriendlyDateAndTime(const Time& time);
+
+// Formats a time in a friendly sentence format, e.g.
+// "Monday, March 6, 2008".
+std::wstring TimeFormatFriendlyDate(const Time& time);
+
+} // namespace base
+
+#endif // BASE_I18N_TIME_FORMATTING_H_
diff --git a/base/i18n/word_iterator.cc b/base/i18n/word_iterator.cc
new file mode 100644
index 0000000..45a06b9
--- /dev/null
+++ b/base/i18n/word_iterator.cc
@@ -0,0 +1,87 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/word_iterator.h"
+
+#include "base/logging.h"
+#include "unicode/ubrk.h"
+#include "unicode/ustring.h"
+
+const size_t npos = -1;
+
+WordIterator::WordIterator(const std::wstring& str, BreakType break_type)
+ : iter_(NULL),
+ string_(str),
+ break_type_(break_type),
+ prev_(npos),
+ pos_(0) {
+}
+
+WordIterator::~WordIterator() {
+ if (iter_)
+ ubrk_close(iter_);
+}
+
+bool WordIterator::Init() {
+ UErrorCode status = U_ZERO_ERROR;
+ UBreakIteratorType break_type;
+ switch (break_type_) {
+ case BREAK_WORD:
+ break_type = UBRK_WORD;
+ break;
+ case BREAK_LINE:
+ break_type = UBRK_LINE;
+ break;
+ default:
+ NOTREACHED();
+ break_type = UBRK_LINE;
+ }
+#if defined(WCHAR_T_IS_UTF16)
+ iter_ = ubrk_open(break_type, NULL,
+ string_.data(), static_cast<int32_t>(string_.size()),
+ &status);
+#else // WCHAR_T_IS_UTF16
+ // When wchar_t is wider than UChar (16 bits), transform |string_| into a
+ // UChar* string. Size the UChar* buffer to be large enough to hold twice
+ // as many UTF-16 code points as there are UCS-4 characters, in case each
+ // character translates to a UTF-16 surrogate pair, and leave room for a NUL
+ // terminator.
+ // TODO(avi): avoid this alloc
+ chars_.resize(string_.length() * sizeof(UChar) + 1);
+
+ UErrorCode error = U_ZERO_ERROR;
+ int32_t destLength;
+ u_strFromWCS(&chars_[0], chars_.size(), &destLength, string_.data(),
+ string_.length(), &error);
+
+ iter_ = ubrk_open(break_type, NULL, &chars_[0], destLength, &status);
+#endif
+ if (U_FAILURE(status)) {
+ NOTREACHED() << "ubrk_open failed";
+ return false;
+ }
+ ubrk_first(iter_); // Move the iterator to the beginning of the string.
+ return true;
+}
+
+bool WordIterator::Advance() {
+ prev_ = pos_;
+ const int32_t pos = ubrk_next(iter_);
+ if (pos == UBRK_DONE) {
+ pos_ = npos;
+ return false;
+ } else {
+ pos_ = static_cast<size_t>(pos);
+ return true;
+ }
+}
+
+bool WordIterator::IsWord() const {
+ return (ubrk_getRuleStatus(iter_) != UBRK_WORD_NONE);
+}
+
+std::wstring WordIterator::GetWord() const {
+ DCHECK(prev_ != npos && pos_ != npos);
+ return string_.substr(prev_, pos_ - prev_);
+}
diff --git a/base/i18n/word_iterator.h b/base/i18n/word_iterator.h
new file mode 100644
index 0000000..c9648ca
--- /dev/null
+++ b/base/i18n/word_iterator.h
@@ -0,0 +1,87 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_I18N_WORD_ITERATOR_H_
+#define BASE_I18N_WORD_ITERATOR_H_
+
+#include <string>
+#include <vector>
+
+#include "unicode/uchar.h"
+
+#include "base/basictypes.h"
+
+// The WordIterator class iterates through the words and word breaks
+// in a string. (In the string " foo bar! ", the word breaks are at the
+// periods in ". .foo. .bar.!. .".)
+//
+// To extract the words from a string, move a WordIterator through the
+// string and test whether IsWord() is true. E.g.,
+// WordIterator iter(str, WordIterator::BREAK_WORD);
+// if (!iter.Init()) return false;
+// while (iter.Advance()) {
+// if (iter.IsWord()) {
+// // region [iter.prev(),iter.pos()) contains a word.
+// LOG(INFO) << "word: " << iter.GetWord();
+// }
+// }
+
+
+class WordIterator {
+ public:
+ enum BreakType {
+ BREAK_WORD,
+ BREAK_LINE
+ };
+
+ // Requires |str| to live as long as the WordIterator does.
+ WordIterator(const std::wstring& str, BreakType break_type);
+ ~WordIterator();
+
+ // Init() must be called before any of the iterators are valid.
+ // Returns false if ICU failed to initialize.
+ bool Init();
+
+ // Return the current break position within the string,
+ // or WordIterator::npos when done.
+ size_t pos() const { return pos_; }
+ // Return the value of pos() returned before Advance() was last called.
+ size_t prev() const { return prev_; }
+
+ // Advance to the next break. Returns false if we've run past the end of
+ // the string. (Note that the very last "word break" is after the final
+ // character in the string, and when we advance to that position it's the
+ // last time Advance() returns true.)
+ bool Advance();
+
+ // Returns true if the break we just hit is the end of a word.
+ // (Otherwise, the break iterator just skipped over e.g. whitespace
+ // or punctuation.)
+ bool IsWord() const;
+
+ // Return the word between prev() and pos().
+ // Advance() must have been called successfully at least once
+ // for pos() to have advanced to somewhere useful.
+ std::wstring GetWord() const;
+
+ private:
+ // ICU iterator.
+ void* iter_;
+#if !defined(WCHAR_T_IS_UTF16)
+ std::vector<UChar> chars_;
+#endif
+
+ // The string we're iterating over.
+ const std::wstring& string_;
+
+ // The breaking style (word/line).
+ BreakType break_type_;
+
+ // Previous and current iterator positions.
+ size_t prev_, pos_;
+
+ DISALLOW_COPY_AND_ASSIGN(WordIterator);
+};
+
+#endif // BASE_I18N_WORD_ITERATOR_H__
diff --git a/base/i18n/word_iterator_unittest.cc b/base/i18n/word_iterator_unittest.cc
new file mode 100644
index 0000000..0d28370
--- /dev/null
+++ b/base/i18n/word_iterator_unittest.cc
@@ -0,0 +1,68 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/word_iterator.h"
+
+#include "testing/gtest/include/gtest/gtest.h"
+
+TEST(WordIteratorTest, BreakWord) {
+ std::wstring str(L" foo bar! \npouet boom");
+ WordIterator iter(str, WordIterator::BREAK_WORD);
+ ASSERT_TRUE(iter.Init());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_FALSE(iter.IsWord());
+ EXPECT_EQ(L" ", iter.GetWord());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWord());
+ EXPECT_EQ(L"foo", iter.GetWord());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_FALSE(iter.IsWord());
+ EXPECT_EQ(L" ", iter.GetWord());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWord());
+ EXPECT_EQ(L"bar", iter.GetWord());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_FALSE(iter.IsWord());
+ EXPECT_EQ(L"!", iter.GetWord());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_FALSE(iter.IsWord());
+ EXPECT_EQ(L" ", iter.GetWord());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_FALSE(iter.IsWord());
+ EXPECT_EQ(L"\n", iter.GetWord());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWord());
+ EXPECT_EQ(L"pouet", iter.GetWord());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_FALSE(iter.IsWord());
+ EXPECT_EQ(L" ", iter.GetWord());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWord());
+ EXPECT_EQ(L"boom", iter.GetWord());
+ EXPECT_FALSE(iter.Advance());
+ EXPECT_FALSE(iter.IsWord());
+}
+
+TEST(WordIteratorTest, BreakLine) {
+ std::wstring str(L" foo bar! \npouet boom");
+ WordIterator iter(str, WordIterator::BREAK_LINE);
+ ASSERT_TRUE(iter.Init());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_FALSE(iter.IsWord());
+ EXPECT_EQ(L" ", iter.GetWord());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_FALSE(iter.IsWord());
+ EXPECT_EQ(L"foo ", iter.GetWord());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_TRUE(iter.IsWord());
+ EXPECT_EQ(L"bar! \n", iter.GetWord());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_FALSE(iter.IsWord());
+ EXPECT_EQ(L"pouet ", iter.GetWord());
+ EXPECT_TRUE(iter.Advance());
+ EXPECT_FALSE(iter.IsWord());
+ EXPECT_EQ(L"boom", iter.GetWord());
+ EXPECT_FALSE(iter.Advance());
+ EXPECT_FALSE(iter.IsWord());
+}