// Copyright (c) 2011 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include #include "base/i18n/rtl.h" #include "base/i18n/string_search.h" #include "base/string16.h" #include "base/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" #include "unicode/usearch.h" namespace base { namespace i18n { // Note on setting default locale for testing: The current default locale on // the Mac trybot is en_US_POSIX, with which primary-level collation strength // string search is case-sensitive, when normally it should be // case-insensitive. In other locales (including en_US which English speakers // in the U.S. use), this search would be case-insensitive as expected. TEST(StringSearchTest, ASCII) { std::string default_locale(uloc_getDefault()); bool locale_is_posix = (default_locale == "en_US_POSIX"); if (locale_is_posix) SetICUDefaultLocale("en_US"); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( ASCIIToUTF16("hello"), ASCIIToUTF16("hello world"))); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( ASCIIToUTF16("h e l l o"), ASCIIToUTF16("h e l l o"))); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( ASCIIToUTF16("aabaaa"), ASCIIToUTF16("aaabaabaaa"))); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( ASCIIToUTF16("searching within empty string"), string16())); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( string16(), ASCIIToUTF16("searching for empty string"))); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( ASCIIToUTF16("case insensitivity"), ASCIIToUTF16("CaSe InSeNsItIvItY"))); if (locale_is_posix) SetICUDefaultLocale(default_locale.data()); } TEST(StringSearchTest, UnicodeLocaleIndependent) { // Base characters const string16 e_base = WideToUTF16(L"e"); const string16 E_base = WideToUTF16(L"E"); const string16 a_base = WideToUTF16(L"a"); // Composed characters const string16 e_with_accute_accent = WideToUTF16(L"\u00e9"); const string16 E_with_accute_accent = WideToUTF16(L"\u00c9"); const string16 e_with_grave_accent = WideToUTF16(L"\u00e8"); const string16 E_with_grave_accent = WideToUTF16(L"\u00c8"); const string16 a_with_accute_accent = WideToUTF16(L"\u00e1"); // Decomposed characters const string16 e_with_accute_combining_mark = WideToUTF16(L"e\u0301"); const string16 E_with_accute_combining_mark = WideToUTF16(L"E\u0301"); const string16 e_with_grave_combining_mark = WideToUTF16(L"e\u0300"); const string16 E_with_grave_combining_mark = WideToUTF16(L"E\u0300"); const string16 a_with_accute_combining_mark = WideToUTF16(L"a\u0301"); std::string default_locale(uloc_getDefault()); bool locale_is_posix = (default_locale == "en_US_POSIX"); if (locale_is_posix) SetICUDefaultLocale("en_US"); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_base, e_with_accute_accent)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_accute_accent, e_base)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_base, e_with_accute_combining_mark)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_accute_combining_mark, e_base)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_accute_combining_mark, e_with_accute_accent)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_accute_accent, e_with_accute_combining_mark)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_accute_combining_mark, e_with_grave_combining_mark)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_grave_combining_mark, e_with_accute_combining_mark)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_accute_combining_mark, e_with_grave_accent)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( e_with_grave_accent, e_with_accute_combining_mark)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( E_with_accute_accent, e_with_accute_accent)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( E_with_grave_accent, e_with_accute_accent)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( E_with_accute_combining_mark, e_with_grave_accent)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( E_with_grave_combining_mark, e_with_accute_accent)); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( E_base, e_with_grave_accent)); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( a_with_accute_accent, e_with_accute_accent)); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( a_with_accute_combining_mark, e_with_accute_combining_mark)); if (locale_is_posix) SetICUDefaultLocale(default_locale.data()); } TEST(StringSearchTest, UnicodeLocaleDependent) { // Base characters const string16 a_base = WideToUTF16(L"a"); // Composed characters const string16 a_with_ring = WideToUTF16(L"\u00e5"); EXPECT_TRUE(StringSearchIgnoringCaseAndAccents( a_base, a_with_ring)); const char* default_locale = uloc_getDefault(); SetICUDefaultLocale("da"); EXPECT_FALSE(StringSearchIgnoringCaseAndAccents( a_base, a_with_ring)); SetICUDefaultLocale(default_locale); } } // namespace i18n } // namespace base