diff options
author | hajimehoshi <hajimehoshi@chromium.org> | 2015-08-03 00:21:42 -0700 |
---|---|---|
committer | Commit bot <commit-bot@chromium.org> | 2015-08-03 07:22:27 +0000 |
commit | 92a5d22913c2ac3082d57d4eefa9911961588b41 (patch) | |
tree | be38e50a904f0e7f6c74d3be5d3ba5cb908af038 /components | |
parent | cecb9e1d9bfb0bf198fc04a8b312b375b2dd8556 (diff) | |
download | chromium_src-92a5d22913c2ac3082d57d4eefa9911961588b41.zip chromium_src-92a5d22913c2ac3082d57d4eefa9911961588b41.tar.gz chromium_src-92a5d22913c2ac3082d57d4eefa9911961588b41.tar.bz2 |
Remove the Finch test 'CLD1VsCLD2'
The Finch test 'CDL1VsCLD2' is a test to make sure that CLD2 is not
worse than CLD1 in terms of performance or accuracy. This test is no
longer executed on any platforms. Let's remove this for code health.
CLD version '0' is used to represent to use the Finch trial, and this
CL removes the cases of version 0 from gn, gypi and C++ files.
BUG=515358
Review URL: https://codereview.chromium.org/1259883007
Cr-Commit-Position: refs/heads/master@{#341496}
Diffstat (limited to 'components')
-rw-r--r-- | components/components_tests.gyp | 2 | ||||
-rw-r--r-- | components/translate.gypi | 4 | ||||
-rw-r--r-- | components/translate/content/renderer/BUILD.gn | 2 | ||||
-rw-r--r-- | components/translate/core/language_detection/BUILD.gn | 4 | ||||
-rw-r--r-- | components/translate/core/language_detection/language_detection_util.cc | 134 |
5 files changed, 57 insertions, 89 deletions
diff --git a/components/components_tests.gyp b/components/components_tests.gyp index 75943fb..018fc1a 100644 --- a/components/components_tests.gyp +++ b/components/components_tests.gyp @@ -932,7 +932,7 @@ '<(DEPTH)/base/allocator/allocator.gyp:allocator', ], }], - [ 'cld_version==0 or cld_version==2', { + [ 'cld_version==2', { 'dependencies': [ # Unit tests should always use statically-linked CLD data. '<(DEPTH)/third_party/cld_2/cld_2.gyp:cld2_static', ], diff --git a/components/translate.gypi b/components/translate.gypi index c003e3e..066bad0 100644 --- a/components/translate.gypi +++ b/components/translate.gypi @@ -113,7 +113,7 @@ '<(DEPTH)/third_party/cld/cld.gyp:cld', ], }], - ['cld_version==0 or cld_version==2', { + ['cld_version==2', { 'dependencies': [ '<(DEPTH)/third_party/cld_2/cld_2.gyp:cld_2', ], @@ -207,7 +207,7 @@ 'translate/content/renderer/translate_helper.h', ], 'conditions': [ - ['cld_version==0 or cld_version==2', { + ['cld_version==2', { 'dependencies': [ '<(DEPTH)/third_party/cld_2/cld_2.gyp:cld_2', ], diff --git a/components/translate/content/renderer/BUILD.gn b/components/translate/content/renderer/BUILD.gn index 3a82f2a..1ce88da 100644 --- a/components/translate/content/renderer/BUILD.gn +++ b/components/translate/content/renderer/BUILD.gn @@ -31,7 +31,7 @@ static_library("renderer") { "//v8", ] - if (cld_version == 0 || cld_version == 2) { + if (cld_version == 2) { deps += [ "//third_party/cld_2" ] } } diff --git a/components/translate/core/language_detection/BUILD.gn b/components/translate/core/language_detection/BUILD.gn index f704844..57e1d10 100644 --- a/components/translate/core/language_detection/BUILD.gn +++ b/components/translate/core/language_detection/BUILD.gn @@ -16,10 +16,10 @@ static_library("language_detection") { "//url", ] - if (cld_version == 0 || cld_version == 1) { + if (cld_version == 1) { deps += [ "//third_party/cld" ] } - if (cld_version == 0 || cld_version == 2) { + if (cld_version == 2) { deps += [ "//third_party/cld_2" ] } } diff --git a/components/translate/core/language_detection/language_detection_util.cc b/components/translate/core/language_detection/language_detection_util.cc index 5c751a7..4ab86fc 100644 --- a/components/translate/core/language_detection/language_detection_util.cc +++ b/components/translate/core/language_detection/language_detection_util.cc @@ -5,7 +5,6 @@ #include "components/translate/core/language_detection/language_detection_util.h" #include "base/logging.h" -#include "base/metrics/field_trial.h" #include "base/strings/string_split.h" #include "base/strings/string_util.h" #include "base/strings/utf_string_conversions.h" @@ -14,12 +13,12 @@ #include "components/translate/core/common/translate_metrics.h" #include "components/translate/core/common/translate_util.h" -#if !defined(CLD_VERSION) || CLD_VERSION==1 +#if CLD_VERSION==1 #include "third_party/cld/encodings/compact_lang_det/compact_lang_det.h" #include "third_party/cld/encodings/compact_lang_det/win/cld_unicodetext.h" #endif -#if !defined(CLD_VERSION) || CLD_VERSION==2 +#if CLD_VERSION==2 #include "third_party/cld_2/src/public/compact_lang_det.h" #endif @@ -70,18 +69,6 @@ void ApplyLanguageCodeCorrection(std::string* code) { translate::ToTranslateLanguageSynonym(code); } -int GetCLDMajorVersion() { -#if !defined(CLD_VERSION) - std::string group_name = base::FieldTrialList::FindFullName("CLD1VsCLD2"); - if (group_name == "CLD2") - return 2; - else - return 1; -#else - return CLD_VERSION; -#endif -} - // Returns the ISO 639 language code of the specified |text|, or 'unknown' if it // failed. // |is_cld_reliable| will be set as true if CLD says the detection is reliable. @@ -96,45 +83,36 @@ std::string DetermineTextLanguage(const base::string16& text, int cld_language = 0; bool is_valid_language = false; - switch (GetCLDMajorVersion()) { -#if !defined(CLD_VERSION) || CLD_VERSION==1 - case 1: { - int num_languages = 0; - cld_language = DetectLanguageOfUnicodeText( - NULL, text.c_str(), is_plain_text, &is_reliable, &num_languages, NULL, - &num_bytes_evaluated); - is_valid_language = cld_language != NUM_LANGUAGES && - cld_language != UNKNOWN_LANGUAGE && - cld_language != TG_UNKNOWN_LANGUAGE; - break; - } -#endif -#if !defined(CLD_VERSION) || CLD_VERSION==2 - case 2: { - const std::string utf8_text(base::UTF16ToUTF8(text)); - const int num_utf8_bytes = static_cast<int>(utf8_text.size()); - const char* raw_utf8_bytes = utf8_text.c_str(); - cld_language = CLD2::DetectLanguageCheckUTF8( - raw_utf8_bytes, num_utf8_bytes, is_plain_text, &is_reliable, - &num_bytes_evaluated); - - if (num_bytes_evaluated < num_utf8_bytes && - cld_language == CLD2::UNKNOWN_LANGUAGE) { - // Invalid UTF8 encountered, see bug http://crbug.com/444258. - // Retry using only the valid characters. This time the check for valid - // UTF8 can be skipped since the precise number of valid bytes is known. - cld_language = CLD2::DetectLanguage(raw_utf8_bytes, num_bytes_evaluated, - is_plain_text, &is_reliable); - } - is_valid_language = cld_language != CLD2::NUM_LANGUAGES && - cld_language != CLD2::UNKNOWN_LANGUAGE && - cld_language != CLD2::TG_UNKNOWN_LANGUAGE; - break; - } -#endif - default: - NOTREACHED(); +#if CLD_VERSION==1 + int num_languages = 0; + cld_language = DetectLanguageOfUnicodeText( + NULL, text.c_str(), is_plain_text, &is_reliable, &num_languages, NULL, + &num_bytes_evaluated); + is_valid_language = cld_language != NUM_LANGUAGES && + cld_language != UNKNOWN_LANGUAGE && + cld_language != TG_UNKNOWN_LANGUAGE; +#elif CLD_VERSION==2 + const std::string utf8_text(base::UTF16ToUTF8(text)); + const int num_utf8_bytes = static_cast<int>(utf8_text.size()); + const char* raw_utf8_bytes = utf8_text.c_str(); + cld_language = CLD2::DetectLanguageCheckUTF8( + raw_utf8_bytes, num_utf8_bytes, is_plain_text, &is_reliable, + &num_bytes_evaluated); + + if (num_bytes_evaluated < num_utf8_bytes && + cld_language == CLD2::UNKNOWN_LANGUAGE) { + // Invalid UTF8 encountered, see bug http://crbug.com/444258. + // Retry using only the valid characters. This time the check for valid + // UTF8 can be skipped since the precise number of valid bytes is known. + cld_language = CLD2::DetectLanguage(raw_utf8_bytes, num_bytes_evaluated, + is_plain_text, &is_reliable); } + is_valid_language = cld_language != CLD2::NUM_LANGUAGES && + cld_language != CLD2::UNKNOWN_LANGUAGE && + cld_language != CLD2::TG_UNKNOWN_LANGUAGE; +#else +# error "CLD_VERSION must be 1 or 2" +#endif if (is_cld_reliable != NULL) *is_cld_reliable = is_reliable; @@ -152,37 +130,27 @@ std::string DetermineTextLanguage(const base::string16& text, // |LanguageCodeWithDialect| will go through ISO 639-1, ISO-639-2 and // 'other' tables to do the 'right' thing. In addition, it'll return zh-CN // for Simplified Chinese. - switch (GetCLDMajorVersion()) { -#if !defined(CLD_VERSION) || CLD_VERSION==1 - case 1: - language = - LanguageCodeWithDialects(static_cast<Language>(cld_language)); - break; -#endif -#if !defined(CLD_VERSION) || CLD_VERSION==2 - case 2: - // (1) CLD2's LanguageCode returns general Chinese 'zh' for - // CLD2::CHINESE, but Translate server doesn't accept it. This is - // converted to 'zh-CN' in the same way as CLD1's - // LanguageCodeWithDialects. - // - // (2) CLD2's LanguageCode returns zh-Hant instead of zh-TW for - // CLD2::CHINESE_T. This is technically more precise for the language - // code of traditional Chinese, while Translate server hasn't accepted - // zh-Hant yet. - if (cld_language == CLD2::CHINESE) { - language = "zh-CN"; - } else if (cld_language == CLD2::CHINESE_T) { - language = "zh-TW"; - } else { - language = - CLD2::LanguageCode(static_cast<CLD2::Language>(cld_language)); - } - break; +#if CLD_VERSION==1 + language = LanguageCodeWithDialects(static_cast<Language>(cld_language)); +#elif CLD_VERSION==2 + // (1) CLD2's LanguageCode returns general Chinese 'zh' for + // CLD2::CHINESE, but Translate server doesn't accept it. This is + // converted to 'zh-CN' in the same way as CLD1's + // LanguageCodeWithDialects. + // + // (2) CLD2's LanguageCode returns zh-Hant instead of zh-TW for + // CLD2::CHINESE_T. This is technically more precise for the language + // code of traditional Chinese, while Translate server hasn't accepted + // zh-Hant yet. + if (cld_language == CLD2::CHINESE) + language = "zh-CN"; + else if (cld_language == CLD2::CHINESE_T) + language = "zh-TW"; + else + language = CLD2::LanguageCode(static_cast<CLD2::Language>(cld_language)); +#else +# error "CLD_VERSION must be 1 or 2" #endif - default: - NOTREACHED(); - } } VLOG(9) << "Detected lang_id: " << language << ", from Text:\n" << text << "\n*************************************\n"; |