diff options
author | jshin <jshin@chromium.org> | 2015-08-07 03:11:09 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-08-07 10:11:42 +0000 |
commit | 8b581d8b638951f98c0fb0c0116ac18b355b825e (patch) | |
tree | 58623cbfa64540aca8cb288b29bf8a0efbb2327b /base | |
parent | 3bc71b8291a88cc35c21d05d9fb99787cccfddf5 (diff) | |
download | chromium_src-8b581d8b638951f98c0fb0c0116ac18b355b825e.zip chromium_src-8b581d8b638951f98c0fb0c0116ac18b355b825e.tar.gz chromium_src-8b581d8b638951f98c0fb0c0116ac18b355b825e.tar.bz2 |
Add ICU message format support
Adopt and customize a ICU message format wrapper used at Google to meet
Chromium's need.
This will enable formatting of 'complex messages' requiring plural and/or
selector (e.g. gender or 'single vs multiple') support with more than one
parameters.
Besides, l10n_util::GetPluralStringF* is rewritten to use this API.
I'm also planning to use this API to add a similar support to Chromium's
JavaScript-based UI and extensions.
References:
MessageFormat specs:
http://icu-project.org/apiref/icu4j/com/ibm/icu/text/MessageFormat.html
http://icu-project.org/apiref/icu4c/classicu_1_1DecimalFormat.html#details
Examples:
http://userguide.icu-project.org/formatparse/messages
message_formatter_unittest.cc
go/plurals inside Google.
BUG=481734
TEST=base_unittests --gtest_filter="MessageFormat*"
Review URL: https://codereview.chromium.org/1140153005
Cr-Commit-Position: refs/heads/master@{#342327}
Diffstat (limited to 'base')
-rw-r--r-- | base/BUILD.gn | 7 | ||||
-rw-r--r-- | base/base.gyp | 3 | ||||
-rw-r--r-- | base/base.gypi | 2 | ||||
-rw-r--r-- | base/i18n/message_formatter.cc | 141 | ||||
-rw-r--r-- | base/i18n/message_formatter.h | 111 | ||||
-rw-r--r-- | base/i18n/message_formatter_unittest.cc | 180 |
6 files changed, 443 insertions, 1 deletions
diff --git a/base/BUILD.gn b/base/BUILD.gn index b8ae3a9..6f364a9 100644 --- a/base/BUILD.gn +++ b/base/BUILD.gn @@ -987,6 +987,8 @@ component("i18n") { "i18n/icu_string_conversions.h", "i18n/icu_util.cc", "i18n/icu_util.h", + "i18n/message_formatter.cc", + "i18n/message_formatter.h", "i18n/number_formatting.cc", "i18n/number_formatting.h", "i18n/rtl.cc", @@ -1006,10 +1008,12 @@ component("i18n") { ] defines = [ "BASE_I18N_IMPLEMENTATION" ] configs += [ "//build/config/compiler:wexit_time_destructors" ] + public_deps = [ + "//third_party/icu", + ] deps = [ ":base", "//base/third_party/dynamic_annotations", - "//third_party/icu", ] if (!is_debug) { @@ -1261,6 +1265,7 @@ test("base_unittests") { "i18n/char_iterator_unittest.cc", "i18n/file_util_icu_unittest.cc", "i18n/icu_string_conversions_unittest.cc", + "i18n/message_formatter_unittest.cc", "i18n/number_formatting_unittest.cc", "i18n/rtl_unittest.cc", "i18n/streaming_utf8_validator_unittest.cc", diff --git a/base/base.gyp b/base/base.gyp index e9b0995..4a558d8 100644 --- a/base/base.gyp +++ b/base/base.gyp @@ -288,6 +288,8 @@ ], 'export_dependent_settings': [ 'base', + '../third_party/icu/icu.gyp:icuuc', + '../third_party/icu/icu.gyp:icui18n', ], 'includes': [ '../build/android/increase_size_for_speed.gypi', @@ -483,6 +485,7 @@ 'i18n/char_iterator_unittest.cc', 'i18n/file_util_icu_unittest.cc', 'i18n/icu_string_conversions_unittest.cc', + 'i18n/message_formatter_unittest.cc', 'i18n/number_formatting_unittest.cc', 'i18n/rtl_unittest.cc', 'i18n/streaming_utf8_validator_unittest.cc', diff --git a/base/base.gypi b/base/base.gypi index c3725a0..cce8e44 100644 --- a/base/base.gypi +++ b/base/base.gypi @@ -1018,6 +1018,8 @@ 'i18n/icu_string_conversions.h', 'i18n/icu_util.cc', 'i18n/icu_util.h', + 'i18n/message_formatter.cc', + 'i18n/message_formatter.h', 'i18n/number_formatting.cc', 'i18n/number_formatting.h', 'i18n/rtl.cc', diff --git a/base/i18n/message_formatter.cc b/base/i18n/message_formatter.cc new file mode 100644 index 0000000..702e51b --- /dev/null +++ b/base/i18n/message_formatter.cc @@ -0,0 +1,141 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/i18n/message_formatter.h" + +#include "base/logging.h" +#include "base/numerics/safe_conversions.h" +#include "base/time/time.h" +#include "third_party/icu/source/common/unicode/unistr.h" +#include "third_party/icu/source/common/unicode/utypes.h" +#include "third_party/icu/source/i18n/unicode/fmtable.h" +#include "third_party/icu/source/i18n/unicode/msgfmt.h" + +using icu::UnicodeString; + +namespace base { +namespace i18n { +namespace { +UnicodeString UnicodeStringFromStringPiece(StringPiece str) { + return UnicodeString::fromUTF8( + icu::StringPiece(str.data(), base::checked_cast<int32_t>(str.size()))); +} +} // anonymous namespace + +namespace internal { +MessageArg::MessageArg() : formattable(nullptr) {} + +MessageArg::MessageArg(const char* s) + : formattable(new icu::Formattable(UnicodeStringFromStringPiece(s))) {} + +MessageArg::MessageArg(StringPiece s) + : formattable(new icu::Formattable(UnicodeStringFromStringPiece(s))) {} + +MessageArg::MessageArg(const std::string& s) + : formattable(new icu::Formattable(UnicodeString::fromUTF8(s))) {} + +MessageArg::MessageArg(const string16& s) + : formattable(new icu::Formattable(UnicodeString(s.data(), s.size()))) {} + +MessageArg::MessageArg(int i) : formattable(new icu::Formattable(i)) {} + +MessageArg::MessageArg(int64_t i) : formattable(new icu::Formattable(i)) {} + +MessageArg::MessageArg(double d) : formattable(new icu::Formattable(d)) {} + +MessageArg::MessageArg(const Time& t) + : formattable(new icu::Formattable(static_cast<UDate>(t.ToJsTime()))) {} + +MessageArg::~MessageArg() {} + +// Tests if this argument has a value, and if so increments *count. +bool MessageArg::has_value(int *count) const { + if (formattable == nullptr) + return false; + + ++*count; + return true; +} + +} // namespace internal + +string16 MessageFormatter::FormatWithNumberedArgs( + StringPiece16 msg, + const internal::MessageArg& arg0, + const internal::MessageArg& arg1, + const internal::MessageArg& arg2, + const internal::MessageArg& arg3, + const internal::MessageArg& arg4, + const internal::MessageArg& arg5, + const internal::MessageArg& arg6) { + int32_t args_count = 0; + icu::Formattable args[] = { + arg0.has_value(&args_count) ? *arg0.formattable : icu::Formattable(), + arg1.has_value(&args_count) ? *arg1.formattable : icu::Formattable(), + arg2.has_value(&args_count) ? *arg2.formattable : icu::Formattable(), + arg3.has_value(&args_count) ? *arg3.formattable : icu::Formattable(), + arg4.has_value(&args_count) ? *arg4.formattable : icu::Formattable(), + arg5.has_value(&args_count) ? *arg5.formattable : icu::Formattable(), + arg6.has_value(&args_count) ? *arg6.formattable : icu::Formattable(), + }; + + UnicodeString msg_string(msg.data(), msg.size()); + UErrorCode error = U_ZERO_ERROR; + icu::MessageFormat format(msg_string, error); + icu::UnicodeString formatted; + icu::FieldPosition ignore(icu::FieldPosition::DONT_CARE); + format.format(args, args_count, formatted, ignore, error); + if (U_FAILURE(error)) { + LOG(ERROR) << "MessageFormat(" << msg.as_string() << ") failed with " + << u_errorName(error); + return string16(); + } + return string16(formatted.getBuffer(), formatted.length()); +} + +string16 MessageFormatter::FormatWithNamedArgs( + StringPiece16 msg, + StringPiece name0, const internal::MessageArg& arg0, + StringPiece name1, const internal::MessageArg& arg1, + StringPiece name2, const internal::MessageArg& arg2, + StringPiece name3, const internal::MessageArg& arg3, + StringPiece name4, const internal::MessageArg& arg4, + StringPiece name5, const internal::MessageArg& arg5, + StringPiece name6, const internal::MessageArg& arg6) { + icu::UnicodeString names[] = { + UnicodeStringFromStringPiece(name0), + UnicodeStringFromStringPiece(name1), + UnicodeStringFromStringPiece(name2), + UnicodeStringFromStringPiece(name3), + UnicodeStringFromStringPiece(name4), + UnicodeStringFromStringPiece(name5), + UnicodeStringFromStringPiece(name6), + }; + int32_t args_count = 0; + icu::Formattable args[] = { + arg0.has_value(&args_count) ? *arg0.formattable : icu::Formattable(), + arg1.has_value(&args_count) ? *arg1.formattable : icu::Formattable(), + arg2.has_value(&args_count) ? *arg2.formattable : icu::Formattable(), + arg3.has_value(&args_count) ? *arg3.formattable : icu::Formattable(), + arg4.has_value(&args_count) ? *arg4.formattable : icu::Formattable(), + arg5.has_value(&args_count) ? *arg5.formattable : icu::Formattable(), + arg6.has_value(&args_count) ? *arg6.formattable : icu::Formattable(), + }; + + UnicodeString msg_string(msg.data(), msg.size()); + UErrorCode error = U_ZERO_ERROR; + icu::MessageFormat format(msg_string, error); + + icu::UnicodeString formatted; + format.format(names, args, args_count, formatted, error); + if (U_FAILURE(error)) { + LOG(ERROR) << "MessageFormat(" << msg.as_string() << ") failed with " + << u_errorName(error); + return string16(); + } + return string16(formatted.getBuffer(), formatted.length()); +} + +} // namespace i18n +} // namespace base diff --git a/base/i18n/message_formatter.h b/base/i18n/message_formatter.h new file mode 100644 index 0000000..bcdc3bc --- /dev/null +++ b/base/i18n/message_formatter.h @@ -0,0 +1,111 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_I18N_MESSAGE_FORMATTER_H_ +#define BASE_I18N_MESSAGE_FORMATTER_H_ + +#include <stdint.h> +#include <string> + +#include "base/i18n/base_i18n_export.h" +#include "base/memory/scoped_ptr.h" +#include "base/strings/string16.h" +#include "base/strings/string_piece.h" +#include "third_party/icu/source/common/unicode/uversion.h" + +U_NAMESPACE_BEGIN +class Formattable; +U_NAMESPACE_END + +namespace base { + +class Time; + +namespace i18n { + +class MessageFormatter; + +namespace internal { + +class BASE_I18N_EXPORT MessageArg { + public: + MessageArg(const char* s); + MessageArg(StringPiece s); + MessageArg(const std::string& s); + MessageArg(const string16& s); + MessageArg(int i); + MessageArg(int64_t i); + MessageArg(double d); + MessageArg(const Time& t); + ~MessageArg(); + + private: + friend class base::i18n::MessageFormatter; + MessageArg(); + // Tests if this argument has a value, and if so increments *count. + bool has_value(int* count) const; + scoped_ptr<icu::Formattable> formattable; + DISALLOW_COPY_AND_ASSIGN(MessageArg); +}; + +} // namespace internal + +// Message Formatter with the ICU message format syntax support. +// It can format strings (UTF-8 and UTF-16), numbers and base::Time with +// plural, gender and other 'selectors' support. This is handy if you +// have multiple parameters of differnt types and some of them require +// plural or gender/selector support. +// +// To use this API for locale-sensitive formatting, retrieve a 'message +// template' in the ICU message format from a message bundle (e.g. with +// l10n_util::GetStringUTF16()) and pass it to FormatWith{Named,Numbered}Args. +// +// MessageFormat specs: +// http://icu-project.org/apiref/icu4j/com/ibm/icu/text/MessageFormat.html +// http://icu-project.org/apiref/icu4c/classicu_1_1DecimalFormat.html#details +// Examples: +// http://userguide.icu-project.org/formatparse/messages +// message_formatter_unittest.cc +// go/plurals inside Google. +// TODO(jshin): Document this API at sites.chromium.org and add a reference +// here. + +class BASE_I18N_EXPORT MessageFormatter { + public: + static string16 FormatWithNamedArgs( + StringPiece16 msg, + StringPiece name0 = StringPiece(), + const internal::MessageArg& arg0 = internal::MessageArg(), + StringPiece name1 = StringPiece(), + const internal::MessageArg& arg1 = internal::MessageArg(), + StringPiece name2 = StringPiece(), + const internal::MessageArg& arg2 = internal::MessageArg(), + StringPiece name3 = StringPiece(), + const internal::MessageArg& arg3 = internal::MessageArg(), + StringPiece name4 = StringPiece(), + const internal::MessageArg& arg4 = internal::MessageArg(), + StringPiece name5 = StringPiece(), + const internal::MessageArg& arg5 = internal::MessageArg(), + StringPiece name6 = StringPiece(), + const internal::MessageArg& arg6 = internal::MessageArg()); + + static string16 FormatWithNumberedArgs( + StringPiece16 msg, + const internal::MessageArg& arg0 = internal::MessageArg(), + const internal::MessageArg& arg1 = internal::MessageArg(), + const internal::MessageArg& arg2 = internal::MessageArg(), + const internal::MessageArg& arg3 = internal::MessageArg(), + const internal::MessageArg& arg4 = internal::MessageArg(), + const internal::MessageArg& arg5 = internal::MessageArg(), + const internal::MessageArg& arg6 = internal::MessageArg()); + + private: + MessageFormatter() {} + DISALLOW_COPY_AND_ASSIGN(MessageFormatter); +}; + +} // namespace i18n +} // namespace base + +#endif // BASE_I18N_MESSAGE_FORMATTER_H_ diff --git a/base/i18n/message_formatter_unittest.cc b/base/i18n/message_formatter_unittest.cc new file mode 100644 index 0000000..85e2e17 --- /dev/null +++ b/base/i18n/message_formatter_unittest.cc @@ -0,0 +1,180 @@ +// Copyright 2015 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/i18n/message_formatter.h" + +#include "base/i18n/rtl.h" +#include "base/memory/scoped_ptr.h" +#include "base/strings/string_piece.h" +#include "base/strings/string_util.h" +#include "base/strings/utf_string_conversions.h" +#include "base/time/time.h" +#include "testing/gtest/include/gtest/gtest.h" +#include "third_party/icu/source/common/unicode/unistr.h" +#include "third_party/icu/source/i18n/unicode/datefmt.h" +#include "third_party/icu/source/i18n/unicode/msgfmt.h" + +typedef testing::Test MessageFormatterTest; + +namespace base { +namespace i18n { + +class MessageFormatterTest : public testing::Test { + protected: + MessageFormatterTest() { + original_locale_ = GetConfiguredLocale(); + SetICUDefaultLocale("en-US"); + } + ~MessageFormatterTest() override { + SetICUDefaultLocale(original_locale_); + } + + private: + std::string original_locale_; +}; + +namespace { + +void AppendFormattedDateTime(const scoped_ptr<icu::DateFormat>& df, + const Time& now, std::string* result) { + icu::UnicodeString formatted; + df->format(static_cast<UDate>(now.ToJsTime()), formatted). + toUTF8String(*result); +} + +} // namespace + +TEST_F(MessageFormatterTest, PluralNamedArgs) { + const string16 pattern = ASCIIToUTF16( + "{num_people, plural, " + "=0 {I met nobody in {place}.}" + "=1 {I met a person in {place}.}" + "other {I met # people in {place}.}}"); + + std::string result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs( + pattern, "num_people", 0, "place", "Paris")); + EXPECT_EQ("I met nobody in Paris.", result); + result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs( + pattern, "num_people", 1, "place", "Paris")); + EXPECT_EQ("I met a person in Paris.", result); + result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs( + pattern, "num_people", 5, "place", "Paris")); + EXPECT_EQ("I met 5 people in Paris.", result); +} + +TEST_F(MessageFormatterTest, PluralNamedArgsWithOffset) { + const string16 pattern = ASCIIToUTF16( + "{num_people, plural, offset:1 " + "=0 {I met nobody in {place}.}" + "=1 {I met {person} in {place}.}" + "=2 {I met {person} and one other person in {place}.}" + "=13 {I met {person} and a dozen other people in {place}.}" + "other {I met {person} and # other people in {place}.}}"); + + std::string result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs( + pattern, "num_people", 0, "place", "Paris")); + EXPECT_EQ("I met nobody in Paris.", result); + // {person} is ignored if {num_people} is 0. + result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs( + pattern, "num_people", 0, "place", "Paris", "person", "Peter")); + EXPECT_EQ("I met nobody in Paris.", result); + result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs( + pattern, "num_people", 1, "place", "Paris", "person", "Peter")); + EXPECT_EQ("I met Peter in Paris.", result); + result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs( + pattern, "num_people", 2, "place", "Paris", "person", "Peter")); + EXPECT_EQ("I met Peter and one other person in Paris.", result); + result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs( + pattern, "num_people", 13, "place", "Paris", "person", "Peter")); + EXPECT_EQ("I met Peter and a dozen other people in Paris.", result); + result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs( + pattern, "num_people", 50, "place", "Paris", "person", "Peter")); + EXPECT_EQ("I met Peter and 49 other people in Paris.", result); +} + +TEST_F(MessageFormatterTest, PluralNumberedArgs) { + const string16 pattern = ASCIIToUTF16( + "{1, plural, " + "=1 {The cert for {0} expired yesterday.}" + "=7 {The cert for {0} expired a week ago.}" + "other {The cert for {0} expired # days ago.}}"); + + std::string result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs( + pattern, "example.com", 1)); + EXPECT_EQ("The cert for example.com expired yesterday.", result); + result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs( + pattern, "example.com", 7)); + EXPECT_EQ("The cert for example.com expired a week ago.", result); + result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs( + pattern, "example.com", 15)); + EXPECT_EQ("The cert for example.com expired 15 days ago.", result); +} + +TEST_F(MessageFormatterTest, PluralNumberedArgsWithDate) { + const string16 pattern = ASCIIToUTF16( + "{1, plural, " + "=1 {The cert for {0} expired yesterday. Today is {2,date,full}}" + "other {The cert for {0} expired # days ago. Today is {2,date,full}}}"); + + base::Time now = base::Time::Now(); + using icu::DateFormat; + scoped_ptr<DateFormat> df(DateFormat::createDateInstance(DateFormat::FULL)); + std::string second_sentence = " Today is "; + AppendFormattedDateTime(df, now, &second_sentence); + + std::string result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs( + pattern, "example.com", 1, now)); + EXPECT_EQ("The cert for example.com expired yesterday." + second_sentence, + result); + result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs( + pattern, "example.com", 15, now)); + EXPECT_EQ("The cert for example.com expired 15 days ago." + second_sentence, + result); +} + +TEST_F(MessageFormatterTest, DateTimeAndNumber) { + // Note that using 'mph' for all locales is not a good i18n practice. + const string16 pattern = ASCIIToUTF16( + "At {0,time, short} on {0,date, medium}, " + "there was {1} at building {2,number,integer}. " + "The speed of the wind was {3,number,###.#} mph."); + + using icu::DateFormat; + scoped_ptr<DateFormat> tf(DateFormat::createTimeInstance(DateFormat::SHORT)); + scoped_ptr<DateFormat> df(DateFormat::createDateInstance(DateFormat::MEDIUM)); + + base::Time now = base::Time::Now(); + std::string expected = "At "; + AppendFormattedDateTime(tf, now, &expected); + expected.append(" on "); + AppendFormattedDateTime(df, now, &expected); + expected.append(", there was an explosion at building 3. " + "The speed of the wind was 37.4 mph."); + + EXPECT_EQ(expected, UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs( + pattern, now, "an explosion", 3, 37.413))); +} + +TEST_F(MessageFormatterTest, SelectorSingleOrMultiple) { + const string16 pattern = ASCIIToUTF16( + "{0, select," + "single {Select a file to upload.}" + "multiple {Select files to upload.}" + "other {UNUSED}}"); + + std::string result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs( + pattern, "single")); + EXPECT_EQ("Select a file to upload.", result); + result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs( + pattern, "multiple")); + EXPECT_EQ("Select files to upload.", result); + + // fallback if a parameter is not selectors specified in the message pattern. + result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs( + pattern, "foobar")); + EXPECT_EQ("UNUSED", result); +} + +} // namespace i18n +} // namespace base |