summaryrefslogtreecommitdiffstats
path: root/base
diff options
context:
space:
mode:
authorjshin <jshin@chromium.org>2015-08-07 03:11:09 -0700
committerCommit bot <commit-bot@chromium.org>2015-08-07 10:11:42 +0000
commit8b581d8b638951f98c0fb0c0116ac18b355b825e (patch)
tree58623cbfa64540aca8cb288b29bf8a0efbb2327b /base
parent3bc71b8291a88cc35c21d05d9fb99787cccfddf5 (diff)
downloadchromium_src-8b581d8b638951f98c0fb0c0116ac18b355b825e.zip
chromium_src-8b581d8b638951f98c0fb0c0116ac18b355b825e.tar.gz
chromium_src-8b581d8b638951f98c0fb0c0116ac18b355b825e.tar.bz2
Add ICU message format support
Adopt and customize a ICU message format wrapper used at Google to meet Chromium's need. This will enable formatting of 'complex messages' requiring plural and/or selector (e.g. gender or 'single vs multiple') support with more than one parameters. Besides, l10n_util::GetPluralStringF* is rewritten to use this API. I'm also planning to use this API to add a similar support to Chromium's JavaScript-based UI and extensions. References: MessageFormat specs: http://icu-project.org/apiref/icu4j/com/ibm/icu/text/MessageFormat.html http://icu-project.org/apiref/icu4c/classicu_1_1DecimalFormat.html#details Examples: http://userguide.icu-project.org/formatparse/messages message_formatter_unittest.cc go/plurals inside Google. BUG=481734 TEST=base_unittests --gtest_filter="MessageFormat*" Review URL: https://codereview.chromium.org/1140153005 Cr-Commit-Position: refs/heads/master@{#342327}
Diffstat (limited to 'base')
-rw-r--r--base/BUILD.gn7
-rw-r--r--base/base.gyp3
-rw-r--r--base/base.gypi2
-rw-r--r--base/i18n/message_formatter.cc141
-rw-r--r--base/i18n/message_formatter.h111
-rw-r--r--base/i18n/message_formatter_unittest.cc180
6 files changed, 443 insertions, 1 deletions
diff --git a/base/BUILD.gn b/base/BUILD.gn
index b8ae3a9..6f364a9 100644
--- a/base/BUILD.gn
+++ b/base/BUILD.gn
@@ -987,6 +987,8 @@ component("i18n") {
"i18n/icu_string_conversions.h",
"i18n/icu_util.cc",
"i18n/icu_util.h",
+ "i18n/message_formatter.cc",
+ "i18n/message_formatter.h",
"i18n/number_formatting.cc",
"i18n/number_formatting.h",
"i18n/rtl.cc",
@@ -1006,10 +1008,12 @@ component("i18n") {
]
defines = [ "BASE_I18N_IMPLEMENTATION" ]
configs += [ "//build/config/compiler:wexit_time_destructors" ]
+ public_deps = [
+ "//third_party/icu",
+ ]
deps = [
":base",
"//base/third_party/dynamic_annotations",
- "//third_party/icu",
]
if (!is_debug) {
@@ -1261,6 +1265,7 @@ test("base_unittests") {
"i18n/char_iterator_unittest.cc",
"i18n/file_util_icu_unittest.cc",
"i18n/icu_string_conversions_unittest.cc",
+ "i18n/message_formatter_unittest.cc",
"i18n/number_formatting_unittest.cc",
"i18n/rtl_unittest.cc",
"i18n/streaming_utf8_validator_unittest.cc",
diff --git a/base/base.gyp b/base/base.gyp
index e9b0995..4a558d8 100644
--- a/base/base.gyp
+++ b/base/base.gyp
@@ -288,6 +288,8 @@
],
'export_dependent_settings': [
'base',
+ '../third_party/icu/icu.gyp:icuuc',
+ '../third_party/icu/icu.gyp:icui18n',
],
'includes': [
'../build/android/increase_size_for_speed.gypi',
@@ -483,6 +485,7 @@
'i18n/char_iterator_unittest.cc',
'i18n/file_util_icu_unittest.cc',
'i18n/icu_string_conversions_unittest.cc',
+ 'i18n/message_formatter_unittest.cc',
'i18n/number_formatting_unittest.cc',
'i18n/rtl_unittest.cc',
'i18n/streaming_utf8_validator_unittest.cc',
diff --git a/base/base.gypi b/base/base.gypi
index c3725a0..cce8e44 100644
--- a/base/base.gypi
+++ b/base/base.gypi
@@ -1018,6 +1018,8 @@
'i18n/icu_string_conversions.h',
'i18n/icu_util.cc',
'i18n/icu_util.h',
+ 'i18n/message_formatter.cc',
+ 'i18n/message_formatter.h',
'i18n/number_formatting.cc',
'i18n/number_formatting.h',
'i18n/rtl.cc',
diff --git a/base/i18n/message_formatter.cc b/base/i18n/message_formatter.cc
new file mode 100644
index 0000000..702e51b
--- /dev/null
+++ b/base/i18n/message_formatter.cc
@@ -0,0 +1,141 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/message_formatter.h"
+
+#include "base/logging.h"
+#include "base/numerics/safe_conversions.h"
+#include "base/time/time.h"
+#include "third_party/icu/source/common/unicode/unistr.h"
+#include "third_party/icu/source/common/unicode/utypes.h"
+#include "third_party/icu/source/i18n/unicode/fmtable.h"
+#include "third_party/icu/source/i18n/unicode/msgfmt.h"
+
+using icu::UnicodeString;
+
+namespace base {
+namespace i18n {
+namespace {
+UnicodeString UnicodeStringFromStringPiece(StringPiece str) {
+ return UnicodeString::fromUTF8(
+ icu::StringPiece(str.data(), base::checked_cast<int32_t>(str.size())));
+}
+} // anonymous namespace
+
+namespace internal {
+MessageArg::MessageArg() : formattable(nullptr) {}
+
+MessageArg::MessageArg(const char* s)
+ : formattable(new icu::Formattable(UnicodeStringFromStringPiece(s))) {}
+
+MessageArg::MessageArg(StringPiece s)
+ : formattable(new icu::Formattable(UnicodeStringFromStringPiece(s))) {}
+
+MessageArg::MessageArg(const std::string& s)
+ : formattable(new icu::Formattable(UnicodeString::fromUTF8(s))) {}
+
+MessageArg::MessageArg(const string16& s)
+ : formattable(new icu::Formattable(UnicodeString(s.data(), s.size()))) {}
+
+MessageArg::MessageArg(int i) : formattable(new icu::Formattable(i)) {}
+
+MessageArg::MessageArg(int64_t i) : formattable(new icu::Formattable(i)) {}
+
+MessageArg::MessageArg(double d) : formattable(new icu::Formattable(d)) {}
+
+MessageArg::MessageArg(const Time& t)
+ : formattable(new icu::Formattable(static_cast<UDate>(t.ToJsTime()))) {}
+
+MessageArg::~MessageArg() {}
+
+// Tests if this argument has a value, and if so increments *count.
+bool MessageArg::has_value(int *count) const {
+ if (formattable == nullptr)
+ return false;
+
+ ++*count;
+ return true;
+}
+
+} // namespace internal
+
+string16 MessageFormatter::FormatWithNumberedArgs(
+ StringPiece16 msg,
+ const internal::MessageArg& arg0,
+ const internal::MessageArg& arg1,
+ const internal::MessageArg& arg2,
+ const internal::MessageArg& arg3,
+ const internal::MessageArg& arg4,
+ const internal::MessageArg& arg5,
+ const internal::MessageArg& arg6) {
+ int32_t args_count = 0;
+ icu::Formattable args[] = {
+ arg0.has_value(&args_count) ? *arg0.formattable : icu::Formattable(),
+ arg1.has_value(&args_count) ? *arg1.formattable : icu::Formattable(),
+ arg2.has_value(&args_count) ? *arg2.formattable : icu::Formattable(),
+ arg3.has_value(&args_count) ? *arg3.formattable : icu::Formattable(),
+ arg4.has_value(&args_count) ? *arg4.formattable : icu::Formattable(),
+ arg5.has_value(&args_count) ? *arg5.formattable : icu::Formattable(),
+ arg6.has_value(&args_count) ? *arg6.formattable : icu::Formattable(),
+ };
+
+ UnicodeString msg_string(msg.data(), msg.size());
+ UErrorCode error = U_ZERO_ERROR;
+ icu::MessageFormat format(msg_string, error);
+ icu::UnicodeString formatted;
+ icu::FieldPosition ignore(icu::FieldPosition::DONT_CARE);
+ format.format(args, args_count, formatted, ignore, error);
+ if (U_FAILURE(error)) {
+ LOG(ERROR) << "MessageFormat(" << msg.as_string() << ") failed with "
+ << u_errorName(error);
+ return string16();
+ }
+ return string16(formatted.getBuffer(), formatted.length());
+}
+
+string16 MessageFormatter::FormatWithNamedArgs(
+ StringPiece16 msg,
+ StringPiece name0, const internal::MessageArg& arg0,
+ StringPiece name1, const internal::MessageArg& arg1,
+ StringPiece name2, const internal::MessageArg& arg2,
+ StringPiece name3, const internal::MessageArg& arg3,
+ StringPiece name4, const internal::MessageArg& arg4,
+ StringPiece name5, const internal::MessageArg& arg5,
+ StringPiece name6, const internal::MessageArg& arg6) {
+ icu::UnicodeString names[] = {
+ UnicodeStringFromStringPiece(name0),
+ UnicodeStringFromStringPiece(name1),
+ UnicodeStringFromStringPiece(name2),
+ UnicodeStringFromStringPiece(name3),
+ UnicodeStringFromStringPiece(name4),
+ UnicodeStringFromStringPiece(name5),
+ UnicodeStringFromStringPiece(name6),
+ };
+ int32_t args_count = 0;
+ icu::Formattable args[] = {
+ arg0.has_value(&args_count) ? *arg0.formattable : icu::Formattable(),
+ arg1.has_value(&args_count) ? *arg1.formattable : icu::Formattable(),
+ arg2.has_value(&args_count) ? *arg2.formattable : icu::Formattable(),
+ arg3.has_value(&args_count) ? *arg3.formattable : icu::Formattable(),
+ arg4.has_value(&args_count) ? *arg4.formattable : icu::Formattable(),
+ arg5.has_value(&args_count) ? *arg5.formattable : icu::Formattable(),
+ arg6.has_value(&args_count) ? *arg6.formattable : icu::Formattable(),
+ };
+
+ UnicodeString msg_string(msg.data(), msg.size());
+ UErrorCode error = U_ZERO_ERROR;
+ icu::MessageFormat format(msg_string, error);
+
+ icu::UnicodeString formatted;
+ format.format(names, args, args_count, formatted, error);
+ if (U_FAILURE(error)) {
+ LOG(ERROR) << "MessageFormat(" << msg.as_string() << ") failed with "
+ << u_errorName(error);
+ return string16();
+ }
+ return string16(formatted.getBuffer(), formatted.length());
+}
+
+} // namespace i18n
+} // namespace base
diff --git a/base/i18n/message_formatter.h b/base/i18n/message_formatter.h
new file mode 100644
index 0000000..bcdc3bc
--- /dev/null
+++ b/base/i18n/message_formatter.h
@@ -0,0 +1,111 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_I18N_MESSAGE_FORMATTER_H_
+#define BASE_I18N_MESSAGE_FORMATTER_H_
+
+#include <stdint.h>
+#include <string>
+
+#include "base/i18n/base_i18n_export.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/strings/string16.h"
+#include "base/strings/string_piece.h"
+#include "third_party/icu/source/common/unicode/uversion.h"
+
+U_NAMESPACE_BEGIN
+class Formattable;
+U_NAMESPACE_END
+
+namespace base {
+
+class Time;
+
+namespace i18n {
+
+class MessageFormatter;
+
+namespace internal {
+
+class BASE_I18N_EXPORT MessageArg {
+ public:
+ MessageArg(const char* s);
+ MessageArg(StringPiece s);
+ MessageArg(const std::string& s);
+ MessageArg(const string16& s);
+ MessageArg(int i);
+ MessageArg(int64_t i);
+ MessageArg(double d);
+ MessageArg(const Time& t);
+ ~MessageArg();
+
+ private:
+ friend class base::i18n::MessageFormatter;
+ MessageArg();
+ // Tests if this argument has a value, and if so increments *count.
+ bool has_value(int* count) const;
+ scoped_ptr<icu::Formattable> formattable;
+ DISALLOW_COPY_AND_ASSIGN(MessageArg);
+};
+
+} // namespace internal
+
+// Message Formatter with the ICU message format syntax support.
+// It can format strings (UTF-8 and UTF-16), numbers and base::Time with
+// plural, gender and other 'selectors' support. This is handy if you
+// have multiple parameters of differnt types and some of them require
+// plural or gender/selector support.
+//
+// To use this API for locale-sensitive formatting, retrieve a 'message
+// template' in the ICU message format from a message bundle (e.g. with
+// l10n_util::GetStringUTF16()) and pass it to FormatWith{Named,Numbered}Args.
+//
+// MessageFormat specs:
+// http://icu-project.org/apiref/icu4j/com/ibm/icu/text/MessageFormat.html
+// http://icu-project.org/apiref/icu4c/classicu_1_1DecimalFormat.html#details
+// Examples:
+// http://userguide.icu-project.org/formatparse/messages
+// message_formatter_unittest.cc
+// go/plurals inside Google.
+// TODO(jshin): Document this API at sites.chromium.org and add a reference
+// here.
+
+class BASE_I18N_EXPORT MessageFormatter {
+ public:
+ static string16 FormatWithNamedArgs(
+ StringPiece16 msg,
+ StringPiece name0 = StringPiece(),
+ const internal::MessageArg& arg0 = internal::MessageArg(),
+ StringPiece name1 = StringPiece(),
+ const internal::MessageArg& arg1 = internal::MessageArg(),
+ StringPiece name2 = StringPiece(),
+ const internal::MessageArg& arg2 = internal::MessageArg(),
+ StringPiece name3 = StringPiece(),
+ const internal::MessageArg& arg3 = internal::MessageArg(),
+ StringPiece name4 = StringPiece(),
+ const internal::MessageArg& arg4 = internal::MessageArg(),
+ StringPiece name5 = StringPiece(),
+ const internal::MessageArg& arg5 = internal::MessageArg(),
+ StringPiece name6 = StringPiece(),
+ const internal::MessageArg& arg6 = internal::MessageArg());
+
+ static string16 FormatWithNumberedArgs(
+ StringPiece16 msg,
+ const internal::MessageArg& arg0 = internal::MessageArg(),
+ const internal::MessageArg& arg1 = internal::MessageArg(),
+ const internal::MessageArg& arg2 = internal::MessageArg(),
+ const internal::MessageArg& arg3 = internal::MessageArg(),
+ const internal::MessageArg& arg4 = internal::MessageArg(),
+ const internal::MessageArg& arg5 = internal::MessageArg(),
+ const internal::MessageArg& arg6 = internal::MessageArg());
+
+ private:
+ MessageFormatter() {}
+ DISALLOW_COPY_AND_ASSIGN(MessageFormatter);
+};
+
+} // namespace i18n
+} // namespace base
+
+#endif // BASE_I18N_MESSAGE_FORMATTER_H_
diff --git a/base/i18n/message_formatter_unittest.cc b/base/i18n/message_formatter_unittest.cc
new file mode 100644
index 0000000..85e2e17
--- /dev/null
+++ b/base/i18n/message_formatter_unittest.cc
@@ -0,0 +1,180 @@
+// Copyright 2015 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/message_formatter.h"
+
+#include "base/i18n/rtl.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/strings/string_piece.h"
+#include "base/strings/string_util.h"
+#include "base/strings/utf_string_conversions.h"
+#include "base/time/time.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/icu/source/common/unicode/unistr.h"
+#include "third_party/icu/source/i18n/unicode/datefmt.h"
+#include "third_party/icu/source/i18n/unicode/msgfmt.h"
+
+typedef testing::Test MessageFormatterTest;
+
+namespace base {
+namespace i18n {
+
+class MessageFormatterTest : public testing::Test {
+ protected:
+ MessageFormatterTest() {
+ original_locale_ = GetConfiguredLocale();
+ SetICUDefaultLocale("en-US");
+ }
+ ~MessageFormatterTest() override {
+ SetICUDefaultLocale(original_locale_);
+ }
+
+ private:
+ std::string original_locale_;
+};
+
+namespace {
+
+void AppendFormattedDateTime(const scoped_ptr<icu::DateFormat>& df,
+ const Time& now, std::string* result) {
+ icu::UnicodeString formatted;
+ df->format(static_cast<UDate>(now.ToJsTime()), formatted).
+ toUTF8String(*result);
+}
+
+} // namespace
+
+TEST_F(MessageFormatterTest, PluralNamedArgs) {
+ const string16 pattern = ASCIIToUTF16(
+ "{num_people, plural, "
+ "=0 {I met nobody in {place}.}"
+ "=1 {I met a person in {place}.}"
+ "other {I met # people in {place}.}}");
+
+ std::string result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs(
+ pattern, "num_people", 0, "place", "Paris"));
+ EXPECT_EQ("I met nobody in Paris.", result);
+ result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs(
+ pattern, "num_people", 1, "place", "Paris"));
+ EXPECT_EQ("I met a person in Paris.", result);
+ result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs(
+ pattern, "num_people", 5, "place", "Paris"));
+ EXPECT_EQ("I met 5 people in Paris.", result);
+}
+
+TEST_F(MessageFormatterTest, PluralNamedArgsWithOffset) {
+ const string16 pattern = ASCIIToUTF16(
+ "{num_people, plural, offset:1 "
+ "=0 {I met nobody in {place}.}"
+ "=1 {I met {person} in {place}.}"
+ "=2 {I met {person} and one other person in {place}.}"
+ "=13 {I met {person} and a dozen other people in {place}.}"
+ "other {I met {person} and # other people in {place}.}}");
+
+ std::string result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs(
+ pattern, "num_people", 0, "place", "Paris"));
+ EXPECT_EQ("I met nobody in Paris.", result);
+ // {person} is ignored if {num_people} is 0.
+ result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs(
+ pattern, "num_people", 0, "place", "Paris", "person", "Peter"));
+ EXPECT_EQ("I met nobody in Paris.", result);
+ result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs(
+ pattern, "num_people", 1, "place", "Paris", "person", "Peter"));
+ EXPECT_EQ("I met Peter in Paris.", result);
+ result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs(
+ pattern, "num_people", 2, "place", "Paris", "person", "Peter"));
+ EXPECT_EQ("I met Peter and one other person in Paris.", result);
+ result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs(
+ pattern, "num_people", 13, "place", "Paris", "person", "Peter"));
+ EXPECT_EQ("I met Peter and a dozen other people in Paris.", result);
+ result = UTF16ToASCII(MessageFormatter::FormatWithNamedArgs(
+ pattern, "num_people", 50, "place", "Paris", "person", "Peter"));
+ EXPECT_EQ("I met Peter and 49 other people in Paris.", result);
+}
+
+TEST_F(MessageFormatterTest, PluralNumberedArgs) {
+ const string16 pattern = ASCIIToUTF16(
+ "{1, plural, "
+ "=1 {The cert for {0} expired yesterday.}"
+ "=7 {The cert for {0} expired a week ago.}"
+ "other {The cert for {0} expired # days ago.}}");
+
+ std::string result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs(
+ pattern, "example.com", 1));
+ EXPECT_EQ("The cert for example.com expired yesterday.", result);
+ result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs(
+ pattern, "example.com", 7));
+ EXPECT_EQ("The cert for example.com expired a week ago.", result);
+ result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs(
+ pattern, "example.com", 15));
+ EXPECT_EQ("The cert for example.com expired 15 days ago.", result);
+}
+
+TEST_F(MessageFormatterTest, PluralNumberedArgsWithDate) {
+ const string16 pattern = ASCIIToUTF16(
+ "{1, plural, "
+ "=1 {The cert for {0} expired yesterday. Today is {2,date,full}}"
+ "other {The cert for {0} expired # days ago. Today is {2,date,full}}}");
+
+ base::Time now = base::Time::Now();
+ using icu::DateFormat;
+ scoped_ptr<DateFormat> df(DateFormat::createDateInstance(DateFormat::FULL));
+ std::string second_sentence = " Today is ";
+ AppendFormattedDateTime(df, now, &second_sentence);
+
+ std::string result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs(
+ pattern, "example.com", 1, now));
+ EXPECT_EQ("The cert for example.com expired yesterday." + second_sentence,
+ result);
+ result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs(
+ pattern, "example.com", 15, now));
+ EXPECT_EQ("The cert for example.com expired 15 days ago." + second_sentence,
+ result);
+}
+
+TEST_F(MessageFormatterTest, DateTimeAndNumber) {
+ // Note that using 'mph' for all locales is not a good i18n practice.
+ const string16 pattern = ASCIIToUTF16(
+ "At {0,time, short} on {0,date, medium}, "
+ "there was {1} at building {2,number,integer}. "
+ "The speed of the wind was {3,number,###.#} mph.");
+
+ using icu::DateFormat;
+ scoped_ptr<DateFormat> tf(DateFormat::createTimeInstance(DateFormat::SHORT));
+ scoped_ptr<DateFormat> df(DateFormat::createDateInstance(DateFormat::MEDIUM));
+
+ base::Time now = base::Time::Now();
+ std::string expected = "At ";
+ AppendFormattedDateTime(tf, now, &expected);
+ expected.append(" on ");
+ AppendFormattedDateTime(df, now, &expected);
+ expected.append(", there was an explosion at building 3. "
+ "The speed of the wind was 37.4 mph.");
+
+ EXPECT_EQ(expected, UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs(
+ pattern, now, "an explosion", 3, 37.413)));
+}
+
+TEST_F(MessageFormatterTest, SelectorSingleOrMultiple) {
+ const string16 pattern = ASCIIToUTF16(
+ "{0, select,"
+ "single {Select a file to upload.}"
+ "multiple {Select files to upload.}"
+ "other {UNUSED}}");
+
+ std::string result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs(
+ pattern, "single"));
+ EXPECT_EQ("Select a file to upload.", result);
+ result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs(
+ pattern, "multiple"));
+ EXPECT_EQ("Select files to upload.", result);
+
+ // fallback if a parameter is not selectors specified in the message pattern.
+ result = UTF16ToASCII(MessageFormatter::FormatWithNumberedArgs(
+ pattern, "foobar"));
+ EXPECT_EQ("UNUSED", result);
+}
+
+} // namespace i18n
+} // namespace base