diff options
author | brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-09 18:20:30 +0000 |
---|---|---|
committer | brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-09 18:20:30 +0000 |
commit | fb895c694e2117c29b6afb699095f6e187a44da7 (patch) | |
tree | 04a0d1434a470f55f0e639a3e6f15c18416d80e2 /base/i18n | |
parent | 8ecb6aa0a92d5426c2c98c23e0e3f3c4f06972c5 (diff) | |
download | chromium_src-fb895c694e2117c29b6afb699095f6e187a44da7.zip chromium_src-fb895c694e2117c29b6afb699095f6e187a44da7.tar.gz chromium_src-fb895c694e2117c29b6afb699095f6e187a44da7.tar.bz2 |
Move more ICU-dependent stuff from base into base/i18n. Some test stuff also
depended on this, so to make the DEPS work out, I made a new base/test
directory where I moved the testing-related files into a new directory
base/test.
TEST=none
BUG=none
Review URL: http://codereview.chromium.org/266038
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@28569 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/i18n')
-rw-r--r-- | base/i18n/icu_util.cc | 99 | ||||
-rw-r--r-- | base/i18n/icu_util.h | 16 | ||||
-rw-r--r-- | base/i18n/time_formatting.cc | 72 | ||||
-rw-r--r-- | base/i18n/time_formatting.h | 40 | ||||
-rw-r--r-- | base/i18n/word_iterator.cc | 87 | ||||
-rw-r--r-- | base/i18n/word_iterator.h | 87 | ||||
-rw-r--r-- | base/i18n/word_iterator_unittest.cc | 68 |
7 files changed, 469 insertions, 0 deletions
diff --git a/base/i18n/icu_util.cc b/base/i18n/icu_util.cc new file mode 100644 index 0000000..6239a01 --- /dev/null +++ b/base/i18n/icu_util.cc @@ -0,0 +1,99 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/i18n/icu_util.h" + +#include "build/build_config.h" + +#if defined(OS_WIN) +#include <windows.h> +#endif + +#include <string> + +#include "base/file_path.h" +#include "base/file_util.h" +#include "base/logging.h" +#include "base/path_service.h" +#include "base/string_util.h" +#include "base/sys_string_conversions.h" +#include "unicode/putil.h" +#include "unicode/udata.h" + +#define ICU_UTIL_DATA_FILE 0 +#define ICU_UTIL_DATA_SHARED 1 +#define ICU_UTIL_DATA_STATIC 2 + +#ifndef ICU_UTIL_DATA_IMPL + +#if defined(OS_WIN) +#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_SHARED +#elif defined(OS_MACOSX) +#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_STATIC +#elif defined(OS_LINUX) +#define ICU_UTIL_DATA_IMPL ICU_UTIL_DATA_FILE +#endif + +#endif // ICU_UTIL_DATA_IMPL + +#if defined(OS_WIN) +#define ICU_UTIL_DATA_SYMBOL "icudt" U_ICU_VERSION_SHORT "_dat" +#define ICU_UTIL_DATA_SHARED_MODULE_NAME "icudt" U_ICU_VERSION_SHORT ".dll" +#endif + +namespace icu_util { + +bool Initialize() { +#ifndef NDEBUG + // Assert that we are not called more than once. Even though calling this + // function isn't harmful (ICU can handle it), being called twice probably + // indicates a programming error. + static bool called_once = false; + DCHECK(!called_once); + called_once = true; +#endif + +#if (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_SHARED) + // We expect to find the ICU data module alongside the current module. + std::wstring data_path; + PathService::Get(base::DIR_MODULE, &data_path); + file_util::AppendToPath(&data_path, + ASCIIToWide(ICU_UTIL_DATA_SHARED_MODULE_NAME)); + + HMODULE module = LoadLibrary(data_path.c_str()); + if (!module) { + LOG(ERROR) << "Failed to load " << ICU_UTIL_DATA_SHARED_MODULE_NAME; + return false; + } + + FARPROC addr = GetProcAddress(module, ICU_UTIL_DATA_SYMBOL); + if (!addr) { + LOG(ERROR) << ICU_UTIL_DATA_SYMBOL << ": not found in " + << ICU_UTIL_DATA_SHARED_MODULE_NAME; + return false; + } + + UErrorCode err = U_ZERO_ERROR; + udata_setCommonData(reinterpret_cast<void*>(addr), &err); + return err == U_ZERO_ERROR; +#elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_STATIC) + // Mac bundles the ICU data in. + return true; +#elif (ICU_UTIL_DATA_IMPL == ICU_UTIL_DATA_FILE) + // For now, expect the data file to be alongside the executable. + // This is sufficient while we work on unit tests, but will eventually + // likely live in a data directory. + FilePath data_path; + bool path_ok = PathService::Get(base::DIR_EXE, &data_path); + DCHECK(path_ok); + u_setDataDirectory(data_path.value().c_str()); + // Only look for the packaged data file; + // the default behavior is to look for individual files. + UErrorCode err = U_ZERO_ERROR; + udata_setFileAccess(UDATA_ONLY_PACKAGES, &err); + return err == U_ZERO_ERROR; +#endif +} + +} // namespace icu_util diff --git a/base/i18n/icu_util.h b/base/i18n/icu_util.h new file mode 100644 index 0000000..56eaa37 --- /dev/null +++ b/base/i18n/icu_util.h @@ -0,0 +1,16 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_I18N_ICU_UTIL_H_ +#define BASE_I18N_ICU_UTIL_H_ + +namespace icu_util { + +// Call this function to load ICU's data tables for the current process. This +// function should be called before ICU is used. +bool Initialize(); + +} // namespace icu_util + +#endif // BASE_I18N_ICU_UTIL_H_ diff --git a/base/i18n/time_formatting.cc b/base/i18n/time_formatting.cc new file mode 100644 index 0000000..f031cf7 --- /dev/null +++ b/base/i18n/time_formatting.cc @@ -0,0 +1,72 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/i18n/time_formatting.h" + +#include "base/logging.h" +#include "base/scoped_ptr.h" +#include "base/string_util.h" +#include "base/time.h" +#include "unicode/datefmt.h" + +using base::Time; + +namespace { + +std::wstring TimeFormat(const icu::DateFormat* formatter, + const Time& time) { + DCHECK(formatter); + icu::UnicodeString date_string; + + formatter->format(static_cast<UDate>(time.ToDoubleT() * 1000), date_string); + std::wstring output; + bool success = UTF16ToWide(date_string.getBuffer(), date_string.length(), + &output); + DCHECK(success); + return output; +} + +} + +namespace base { + +std::wstring TimeFormatTimeOfDay(const Time& time) { + // We can omit the locale parameter because the default should match + // Chrome's application locale. + scoped_ptr<icu::DateFormat> formatter( + icu::DateFormat::createTimeInstance(icu::DateFormat::kShort)); + return TimeFormat(formatter.get(), time); +} + +std::wstring TimeFormatShortDate(const Time& time) { + scoped_ptr<icu::DateFormat> formatter( + icu::DateFormat::createDateInstance(icu::DateFormat::kMedium)); + return TimeFormat(formatter.get(), time); +} + +std::wstring TimeFormatShortDateNumeric(const Time& time) { + scoped_ptr<icu::DateFormat> formatter( + icu::DateFormat::createDateInstance(icu::DateFormat::kShort)); + return TimeFormat(formatter.get(), time); +} + +std::wstring TimeFormatShortDateAndTime(const Time& time) { + scoped_ptr<icu::DateFormat> formatter( + icu::DateFormat::createDateTimeInstance(icu::DateFormat::kShort)); + return TimeFormat(formatter.get(), time); +} + +std::wstring TimeFormatFriendlyDateAndTime(const Time& time) { + scoped_ptr<icu::DateFormat> formatter( + icu::DateFormat::createDateTimeInstance(icu::DateFormat::kFull)); + return TimeFormat(formatter.get(), time); +} + +std::wstring TimeFormatFriendlyDate(const Time& time) { + scoped_ptr<icu::DateFormat> formatter(icu::DateFormat::createDateInstance( + icu::DateFormat::kFull)); + return TimeFormat(formatter.get(), time); +} + +} // namespace base diff --git a/base/i18n/time_formatting.h b/base/i18n/time_formatting.h new file mode 100644 index 0000000..dd623af --- /dev/null +++ b/base/i18n/time_formatting.h @@ -0,0 +1,40 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// Basic time formatting methods. These methods use the current locale +// formatting for displaying the time. + +#ifndef BASE_I18N_TIME_FORMATTING_H_ +#define BASE_I18N_TIME_FORMATTING_H_ + +#include <string> + +namespace base { + +class Time; + +// Returns the time of day, e.g., "3:07 PM". +std::wstring TimeFormatTimeOfDay(const Time& time); + +// Returns a shortened date, e.g. "Nov 7, 2007" +std::wstring TimeFormatShortDate(const Time& time); + +// Returns a numeric date such as 12/13/52. +std::wstring TimeFormatShortDateNumeric(const Time& time); + +// Formats a time in a friendly sentence format, e.g. +// "Monday, March 6, 2008 2:44:30 PM". +std::wstring TimeFormatShortDateAndTime(const Time& time); + +// Formats a time in a friendly sentence format, e.g. +// "Monday, March 6, 2008 2:44:30 PM". +std::wstring TimeFormatFriendlyDateAndTime(const Time& time); + +// Formats a time in a friendly sentence format, e.g. +// "Monday, March 6, 2008". +std::wstring TimeFormatFriendlyDate(const Time& time); + +} // namespace base + +#endif // BASE_I18N_TIME_FORMATTING_H_ diff --git a/base/i18n/word_iterator.cc b/base/i18n/word_iterator.cc new file mode 100644 index 0000000..45a06b9 --- /dev/null +++ b/base/i18n/word_iterator.cc @@ -0,0 +1,87 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/i18n/word_iterator.h" + +#include "base/logging.h" +#include "unicode/ubrk.h" +#include "unicode/ustring.h" + +const size_t npos = -1; + +WordIterator::WordIterator(const std::wstring& str, BreakType break_type) + : iter_(NULL), + string_(str), + break_type_(break_type), + prev_(npos), + pos_(0) { +} + +WordIterator::~WordIterator() { + if (iter_) + ubrk_close(iter_); +} + +bool WordIterator::Init() { + UErrorCode status = U_ZERO_ERROR; + UBreakIteratorType break_type; + switch (break_type_) { + case BREAK_WORD: + break_type = UBRK_WORD; + break; + case BREAK_LINE: + break_type = UBRK_LINE; + break; + default: + NOTREACHED(); + break_type = UBRK_LINE; + } +#if defined(WCHAR_T_IS_UTF16) + iter_ = ubrk_open(break_type, NULL, + string_.data(), static_cast<int32_t>(string_.size()), + &status); +#else // WCHAR_T_IS_UTF16 + // When wchar_t is wider than UChar (16 bits), transform |string_| into a + // UChar* string. Size the UChar* buffer to be large enough to hold twice + // as many UTF-16 code points as there are UCS-4 characters, in case each + // character translates to a UTF-16 surrogate pair, and leave room for a NUL + // terminator. + // TODO(avi): avoid this alloc + chars_.resize(string_.length() * sizeof(UChar) + 1); + + UErrorCode error = U_ZERO_ERROR; + int32_t destLength; + u_strFromWCS(&chars_[0], chars_.size(), &destLength, string_.data(), + string_.length(), &error); + + iter_ = ubrk_open(break_type, NULL, &chars_[0], destLength, &status); +#endif + if (U_FAILURE(status)) { + NOTREACHED() << "ubrk_open failed"; + return false; + } + ubrk_first(iter_); // Move the iterator to the beginning of the string. + return true; +} + +bool WordIterator::Advance() { + prev_ = pos_; + const int32_t pos = ubrk_next(iter_); + if (pos == UBRK_DONE) { + pos_ = npos; + return false; + } else { + pos_ = static_cast<size_t>(pos); + return true; + } +} + +bool WordIterator::IsWord() const { + return (ubrk_getRuleStatus(iter_) != UBRK_WORD_NONE); +} + +std::wstring WordIterator::GetWord() const { + DCHECK(prev_ != npos && pos_ != npos); + return string_.substr(prev_, pos_ - prev_); +} diff --git a/base/i18n/word_iterator.h b/base/i18n/word_iterator.h new file mode 100644 index 0000000..c9648ca --- /dev/null +++ b/base/i18n/word_iterator.h @@ -0,0 +1,87 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_I18N_WORD_ITERATOR_H_ +#define BASE_I18N_WORD_ITERATOR_H_ + +#include <string> +#include <vector> + +#include "unicode/uchar.h" + +#include "base/basictypes.h" + +// The WordIterator class iterates through the words and word breaks +// in a string. (In the string " foo bar! ", the word breaks are at the +// periods in ". .foo. .bar.!. .".) +// +// To extract the words from a string, move a WordIterator through the +// string and test whether IsWord() is true. E.g., +// WordIterator iter(str, WordIterator::BREAK_WORD); +// if (!iter.Init()) return false; +// while (iter.Advance()) { +// if (iter.IsWord()) { +// // region [iter.prev(),iter.pos()) contains a word. +// LOG(INFO) << "word: " << iter.GetWord(); +// } +// } + + +class WordIterator { + public: + enum BreakType { + BREAK_WORD, + BREAK_LINE + }; + + // Requires |str| to live as long as the WordIterator does. + WordIterator(const std::wstring& str, BreakType break_type); + ~WordIterator(); + + // Init() must be called before any of the iterators are valid. + // Returns false if ICU failed to initialize. + bool Init(); + + // Return the current break position within the string, + // or WordIterator::npos when done. + size_t pos() const { return pos_; } + // Return the value of pos() returned before Advance() was last called. + size_t prev() const { return prev_; } + + // Advance to the next break. Returns false if we've run past the end of + // the string. (Note that the very last "word break" is after the final + // character in the string, and when we advance to that position it's the + // last time Advance() returns true.) + bool Advance(); + + // Returns true if the break we just hit is the end of a word. + // (Otherwise, the break iterator just skipped over e.g. whitespace + // or punctuation.) + bool IsWord() const; + + // Return the word between prev() and pos(). + // Advance() must have been called successfully at least once + // for pos() to have advanced to somewhere useful. + std::wstring GetWord() const; + + private: + // ICU iterator. + void* iter_; +#if !defined(WCHAR_T_IS_UTF16) + std::vector<UChar> chars_; +#endif + + // The string we're iterating over. + const std::wstring& string_; + + // The breaking style (word/line). + BreakType break_type_; + + // Previous and current iterator positions. + size_t prev_, pos_; + + DISALLOW_COPY_AND_ASSIGN(WordIterator); +}; + +#endif // BASE_I18N_WORD_ITERATOR_H__ diff --git a/base/i18n/word_iterator_unittest.cc b/base/i18n/word_iterator_unittest.cc new file mode 100644 index 0000000..0d28370 --- /dev/null +++ b/base/i18n/word_iterator_unittest.cc @@ -0,0 +1,68 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/i18n/word_iterator.h" + +#include "testing/gtest/include/gtest/gtest.h" + +TEST(WordIteratorTest, BreakWord) { + std::wstring str(L" foo bar! \npouet boom"); + WordIterator iter(str, WordIterator::BREAK_WORD); + ASSERT_TRUE(iter.Init()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(L" ", iter.GetWord()); + EXPECT_TRUE(iter.Advance()); + EXPECT_TRUE(iter.IsWord()); + EXPECT_EQ(L"foo", iter.GetWord()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(L" ", iter.GetWord()); + EXPECT_TRUE(iter.Advance()); + EXPECT_TRUE(iter.IsWord()); + EXPECT_EQ(L"bar", iter.GetWord()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(L"!", iter.GetWord()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(L" ", iter.GetWord()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(L"\n", iter.GetWord()); + EXPECT_TRUE(iter.Advance()); + EXPECT_TRUE(iter.IsWord()); + EXPECT_EQ(L"pouet", iter.GetWord()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(L" ", iter.GetWord()); + EXPECT_TRUE(iter.Advance()); + EXPECT_TRUE(iter.IsWord()); + EXPECT_EQ(L"boom", iter.GetWord()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); +} + +TEST(WordIteratorTest, BreakLine) { + std::wstring str(L" foo bar! \npouet boom"); + WordIterator iter(str, WordIterator::BREAK_LINE); + ASSERT_TRUE(iter.Init()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(L" ", iter.GetWord()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(L"foo ", iter.GetWord()); + EXPECT_TRUE(iter.Advance()); + EXPECT_TRUE(iter.IsWord()); + EXPECT_EQ(L"bar! \n", iter.GetWord()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(L"pouet ", iter.GetWord()); + EXPECT_TRUE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); + EXPECT_EQ(L"boom", iter.GetWord()); + EXPECT_FALSE(iter.Advance()); + EXPECT_FALSE(iter.IsWord()); +} |