diff options
author | jshin@chromium.org <jshin@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-19 21:32:42 +0000 |
---|---|---|
committer | jshin@chromium.org <jshin@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-01-19 21:32:42 +0000 |
commit | 8dc5a205c581c3adf5aeebf6d4ccb4a9c8176658 (patch) | |
tree | 2e0f321db340f08fd2b9ef93eb2120e3baa68a00 | |
parent | 0192d0455dadef14e37ac526bf9902f142f3f592 (diff) | |
download | chromium_src-8dc5a205c581c3adf5aeebf6d4ccb4a9c8176658.zip chromium_src-8dc5a205c581c3adf5aeebf6d4ccb4a9c8176658.tar.gz chromium_src-8dc5a205c581c3adf5aeebf6d4ccb4a9c8176658.tar.bz2 |
Reland r36541 (which went in without any commit log by some magic). Was reverted in r36550. Now I'm relanding with the full description.
Port back CLD to Linux and Mac by replacing Windows API calls with ICU's equivalent APIs for normalization.
I also fixed bug 23553 (Traditional Chinese is not detected) by calling LanguageCode instead of LanguageCode_ISO_639_1. The latter covers only ISO 639-1, but there are languages detected by CLD not covered by ISO 639-1. In that case, ISO 639-2 is used. In case even ISO 639-2 does not cover (e.g. Traditional Chinese), another fallback is taken by LanguageCode.
The html file for CLD testing (french_sentence.html) is explicitly labelled with charset=ISO-8859-1.
Original Review: http://codereview.chromium.org/523108
BUG=25206,23553
TEST=1. CLD is built on Linux/Mac
2. The following test pass:
- unit_tests: Extension*.DetectTabLang* and CompactLangDet*.*
- browser_tests: ExtensionBrowserTest.Toolstrip
3. Install the 'cld extension' in chrome/common/extensions/docs/examples/api/i18n/cld and go to http://news.google.com.tw and 'zh-TW' shows up in the language badge at the upper right (upper-left in he/ar Chrome) corner.
TBR=jcampan
Review URL: http://codereview.chromium.org/545123
TBR=jshin@chromium.org
Review URL: http://codereview.chromium.org/551070
TBR=jshin@chromium.org
Review URL: http://codereview.chromium.org/549091
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@36552 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r-- | build/all.gyp | 2 | ||||
-rw-r--r-- | chrome/browser/extensions/extension_browsertests_misc.cc | 8 | ||||
-rw-r--r-- | chrome/browser/extensions/extension_tabs_module.cc | 5 | ||||
-rwxr-xr-x | chrome/chrome_browser.gypi | 2 | ||||
-rwxr-xr-x | chrome/chrome_renderer.gypi | 6 | ||||
-rwxr-xr-x | chrome/chrome_tests.gypi | 6 | ||||
-rw-r--r-- | chrome/renderer/extensions/extension_api_client_unittest.cc | 2 | ||||
-rw-r--r-- | chrome/renderer/render_view.cc | 23 | ||||
-rw-r--r-- | chrome/renderer/render_view.h | 12 | ||||
-rw-r--r-- | chrome/test/data/extensions/good/Extensions/behllobkkfkfnphdnhnkndlbkcpglgmj/1.0.0.0/french_sentence.html | 1 | ||||
-rw-r--r-- | third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_scopedptr.h | 7 | ||||
-rw-r--r-- | third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.cc | 108 | ||||
-rw-r--r-- | third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.h | 7 | ||||
-rw-r--r-- | third_party/cld/base/string_util.h | 6 | ||||
-rw-r--r-- | third_party/cld/cld.gyp | 238 |
15 files changed, 190 insertions, 243 deletions
diff --git a/build/all.gyp b/build/all.gyp index c063af2..8a0910b 100644 --- a/build/all.gyp +++ b/build/all.gyp @@ -21,6 +21,7 @@ '../testing/gmock.gyp:*', '../testing/gtest.gyp:*', '../third_party/bzip2/bzip2.gyp:*', + '../third_party/cld/cld.gyp:*', '../third_party/codesighs/codesighs.gyp:*', '../third_party/ffmpeg/ffmpeg.gyp:*', '../third_party/icu/icu.gyp:*', @@ -98,7 +99,6 @@ '../sandbox/sandbox.gyp:*', '../third_party/bsdiff/bsdiff.gyp:*', '../third_party/bspatch/bspatch.gyp:*', - '../third_party/cld/cld.gyp:*', '../third_party/gles2_book/gles2_book.gyp:*', '../tools/memory_watcher/memory_watcher.gyp:*', ], diff --git a/chrome/browser/extensions/extension_browsertests_misc.cc b/chrome/browser/extensions/extension_browsertests_misc.cc index ee5935c..d0409aa 100644 --- a/chrome/browser/extensions/extension_browsertests_misc.cc +++ b/chrome/browser/extensions/extension_browsertests_misc.cc @@ -89,23 +89,17 @@ IN_PROC_BROWSER_TEST_F(ExtensionBrowserTest, Toolstrip) { host->render_view_host(), L"", L"testTabsAPI()", &result); EXPECT_TRUE(result); -#if defined(OS_WIN) - // http://crbug.com/29896 - tabs.detectLanguage is Windows only - // Test for compact language detection API. First navigate to a (static) html // file with a French sentence. Then, run the test API in toolstrip1.html to // actually call the language detection API through the existing extension, // and verify that the language returned is indeed French. FilePath language_url = extension_test_data_dir.AppendASCII( "french_sentence.html"); - ui_test_utils::NavigateToURL( - browser(), - GURL(language_url.ToWStringHack())); + ui_test_utils::NavigateToURL(browser(), net::FilePathToFileURL(language_url)); ui_test_utils::ExecuteJavaScriptAndExtractBool( host->render_view_host(), L"", L"testTabsLanguageAPI()", &result); EXPECT_TRUE(result); -#endif } IN_PROC_BROWSER_TEST_F(ExtensionBrowserTest, ExtensionViews) { diff --git a/chrome/browser/extensions/extension_tabs_module.cc b/chrome/browser/extensions/extension_tabs_module.cc index 19d2687..8e6bf2f 100644 --- a/chrome/browser/extensions/extension_tabs_module.cc +++ b/chrome/browser/extensions/extension_tabs_module.cc @@ -832,11 +832,6 @@ void CaptureVisibleTabFunction::SendResultFromBitmap( } bool DetectTabLanguageFunction::RunImpl() { - #if !defined(OS_WIN) - error_ = keys::kSupportedInWindowsOnlyError; - return false; - #endif - int tab_id = 0; Browser* browser = NULL; TabContents* contents = NULL; diff --git a/chrome/chrome_browser.gypi b/chrome/chrome_browser.gypi index fb7d2de..666ecf8 100755 --- a/chrome/chrome_browser.gypi +++ b/chrome/chrome_browser.gypi @@ -2124,12 +2124,10 @@ ], 'include_dirs': [ 'third_party/wtl/include', - '../third_party/cld', ], 'dependencies': [ '../gears/gears.gyp:gears', '../google_update/google_update.gyp:google_update', - '../third_party/cld/cld.gyp:cld', '../views/views.gyp:views', 'installer/installer.gyp:installer_util', '<(allocator_target)', diff --git a/chrome/chrome_renderer.gypi b/chrome/chrome_renderer.gypi index baac651..ea95478 100755 --- a/chrome/chrome_renderer.gypi +++ b/chrome/chrome_renderer.gypi @@ -16,6 +16,7 @@ '../printing/printing.gyp:printing', '../skia/skia.gyp:skia', '../third_party/hunspell/hunspell.gyp:hunspell', + '../third_party/cld/cld.gyp:cld', '../third_party/icu/icu.gyp:icui18n', '../third_party/icu/icu.gyp:icuuc', '../third_party/npapi/npapi.gyp:npapi', @@ -27,6 +28,7 @@ ], 'include_dirs': [ '..', + '../third_party/cld', ], 'defines': [ '<@(nacl_defines)', @@ -168,12 +170,8 @@ # Windows-specific rules. ['OS=="win"', { 'include_dirs': [ - '../third_party/cld', 'third_party/wtl/include', ], - 'dependencies': [ - '../third_party/cld/cld.gyp:cld', - ], 'conditions': [ ['win_use_allocator_shim==1', { 'dependencies': [ diff --git a/chrome/chrome_tests.gypi b/chrome/chrome_tests.gypi index b56840a..7ed6e6a 100755 --- a/chrome/chrome_tests.gypi +++ b/chrome/chrome_tests.gypi @@ -492,6 +492,7 @@ '../testing/gmock.gyp:gmock', '../testing/gtest.gyp:gtest', '../third_party/bzip2/bzip2.gyp:bzip2', + '../third_party/cld/cld.gyp:cld', '../third_party/icu/icu.gyp:icui18n', '../third_party/icu/icu.gyp:icuuc', '../third_party/libxml/libxml.gyp:libxml', @@ -896,8 +897,6 @@ ], 'sources!': [ 'browser/views/bookmark_context_menu_test.cc', - # Compact Language Detection (cld) is not supported in linux yet. - '../third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_unittest_small.cc', ], }], ['OS=="linux" and (toolkit_views==1 or chromeos==1)', { @@ -947,9 +946,6 @@ 'browser/tab_contents/navigation_controller_unittest.cc', 'browser/task_manager_unittest.cc', '../third_party/hunspell/google/hunspell_tests.cc', - - # Compact Language Detection (cld) is not supported in mac yet. - '../third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_unittest_small.cc', ], # TODO(mark): We really want this for all non-static library targets, # but when we tried to pull it up to the common.gypi level, it broke diff --git a/chrome/renderer/extensions/extension_api_client_unittest.cc b/chrome/renderer/extensions/extension_api_client_unittest.cc index 05c2ddf..39d275f 100644 --- a/chrome/renderer/extensions/extension_api_client_unittest.cc +++ b/chrome/renderer/extensions/extension_api_client_unittest.cc @@ -290,7 +290,6 @@ TEST_F(ExtensionAPIClientTest, GetTab) { "tabs.get", "2"); } -#if defined(OS_WIN) TEST_F(ExtensionAPIClientTest, DetectTabLanguage) { ExpectJsFail("chrome.tabs.detectLanguage(32, function(){}, 20);", "Uncaught Error: Too many arguments."); @@ -306,7 +305,6 @@ TEST_F(ExtensionAPIClientTest, DetectTabLanguage) { ExpectJsPass("chrome.tabs.detectLanguage(null, function(){})", "tabs.detectLanguage", "null"); } -#endif TEST_F(ExtensionAPIClientTest, GetSelectedTab) { ExpectJsFail("chrome.tabs.getSelected(32, function(){}, 20);", diff --git a/chrome/renderer/render_view.cc b/chrome/renderer/render_view.cc index 5b1ee82..dcbda82 100644 --- a/chrome/renderer/render_view.cc +++ b/chrome/renderer/render_view.cc @@ -67,10 +67,7 @@ #include "net/base/net_errors.h" #include "skia/ext/bitmap_platform_device.h" #include "skia/ext/image_operations.h" -#if defined(OS_WIN) -// TODO(port): The compact language detection library works only for Windows. #include "third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.h" -#endif #include "third_party/WebKit/WebKit/chromium/public/WebAccessibilityCache.h" #include "third_party/WebKit/WebKit/chromium/public/WebAccessibilityObject.h" #include "third_party/WebKit/WebKit/chromium/public/WebCString.h" @@ -223,7 +220,7 @@ static const char* const kUnreachableWebDataURL = static const char* const kBackForwardNavigationScheme = "history"; // The string returned in DetectLanguage if we failed to detect the language. -static const char* const kUnknownLanguageCode = "unknown"; +static const char* const kUnknownLanguageCode = "und"; static void GetRedirectChain(WebDataSource* ds, std::vector<GURL>* result) { WebVector<WebURL> urls; @@ -3082,31 +3079,29 @@ std::string RenderView::DetectLanguage() { if (!webview() || is_loading_) return kUnknownLanguageCode; - std::string language = kUnknownLanguageCode; -#if defined(OS_WIN) // CLD is only available on Windows at this time. WebFrame* main_frame = webview()->mainFrame(); std::wstring contents; CaptureText(main_frame, &contents); - language = DetermineTextLanguage(contents); -#endif - - return language; + return DetermineTextLanguage(contents); } // static std::string RenderView::DetermineTextLanguage(const std::wstring& text) { std::string language = kUnknownLanguageCode; -#if defined(OS_WIN) // CLD is only available on Windows at this time. int num_languages = 0; bool is_reliable = false; + string16 input = WideToUTF16(text); Language cld_language = - DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, + DetectLanguageOfUnicodeText(NULL, input.c_str(), true, &is_reliable, &num_languages, NULL); if (cld_language != NUM_LANGUAGES && cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { - language = LanguageCodeISO639_1(cld_language); + // We should not use LanguageCode_ISO_639_1 because it does not cover all the + // languages CLD can detect. As a result, it'll return the invalid language + // code for tradtional Chinese among others. |LanguageCode| will go through + // ISO 639-1, ISO-639-2 and 'other' tables to do the 'right' thing. + language = LanguageCode(cld_language); } -#endif return language; } diff --git a/chrome/renderer/render_view.h b/chrome/renderer/render_view.h index 4d3a4e9..036cb0b 100644 --- a/chrome/renderer/render_view.h +++ b/chrome/renderer/render_view.h @@ -436,9 +436,15 @@ class RenderView : public RenderWidget, PageTranslator* page_translator() const { return page_translator_.get(); } - // Returns the ISO 639_1 language code of the current page - // (ex: en, fr, zh...). Returns 'unknown' if the language could not be - // determined. + // Returns the ISO 639 language code of the current page (e.g. en, fr, zh). + // If ISO 639-1 code is not available for the language, ISO 639-2 3-letter code + // will be returned (e.g. kha for Khasi and und for undtermined). For traditional + // Chinse, 'zh-TW' will be returned while for simplified Chinse, 'zh' will be + // returned. + // TODO(jungshik): Make it return 'he' (the correct ISO 639 code for Hebrew) + // instead of the obsolete 'iw'. Perhaps, it's also better to return 'zh-Hans' + // (or 'zh-CN') for Simplified Chinese instead of 'zh' to be aligned with + // 'zh-TW' for Traditional Chinse. std::string DetectLanguage(); protected: diff --git a/chrome/test/data/extensions/good/Extensions/behllobkkfkfnphdnhnkndlbkcpglgmj/1.0.0.0/french_sentence.html b/chrome/test/data/extensions/good/Extensions/behllobkkfkfnphdnhnkndlbkcpglgmj/1.0.0.0/french_sentence.html index 3d3c2e8..a7607f2 100644 --- a/chrome/test/data/extensions/good/Extensions/behllobkkfkfnphdnhnkndlbkcpglgmj/1.0.0.0/french_sentence.html +++ b/chrome/test/data/extensions/good/Extensions/behllobkkfkfnphdnhnkndlbkcpglgmj/1.0.0.0/french_sentence.html @@ -4,6 +4,7 @@ source code is governed by a BSD-style license that can be found in the LICENSE file. --> <html> +<meta charset="ISO-8859-1"> <body> <p> Ceci est une phrase complète est en français, rédigé en anglais puis traduits diff --git a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_scopedptr.h b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_scopedptr.h index 7f18238..650e578 100644 --- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_scopedptr.h +++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_scopedptr.h @@ -5,13 +5,8 @@ #ifndef BAR_TOOLBAR_CLD_I18N_ENCODINGS_COMPACT_LANG_DET_WIN_CLD_SCOPEDPTR_H_ #define BAR_TOOLBAR_CLD_I18N_ENCODINGS_COMPACT_LANG_DET_WIN_CLD_SCOPEDPTR_H_ -#include <wincrypt.h> // to compile common/scopedptr.h -#include <wininet.h> // to compile common/scopedptr.h - -// This include has to be out of order to compile to compile common/scopedptr.h +// This include has to be out of order to compile common/scopedptr.h #include "bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_macros.h" -#include "bar/common/scopedlibrary.h" -#include "bar/common/scopedptr.h" #include "bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_scoped_ptr.h" #endif // BAR_TOOLBAR_CLD_I18N_ENCODINGS_COMPACT_LANG_DET_WIN_CLD_SCOPEDPTR_H_ diff --git a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.cc b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.cc index c1a4d95..5b0e67e9 100644 --- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.cc +++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.cc @@ -4,94 +4,46 @@ #include "bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.h" -#include <tchar.h> -#include <windows.h> - +#include <string> #include <vector> // to compile bar/common/component.h #include "bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.h" -#include "bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_scopedptr.h" -#include "bar/toolbar/cld/i18n/encodings/compact_lang_det/win/normalizedunicodetext.h" +#include "base/string_util.h" +#include "unicode/normlzr.h" +#include "unicode/unistr.h" +#include "unicode/ustring.h" + +std::string NormalizeText(const UChar* text) { + // To avoid a copy, use the read-only aliasing ctor. + icu::UnicodeString source(1, text, -1); + icu::UnicodeString normalized; + UErrorCode status = U_ZERO_ERROR; + icu::Normalizer::normalize(source, UNORM_NFC, 0, normalized, status); + if (U_FAILURE(status)) + return std::string(); + normalized.toLower(); + std::string utf8; + // Internally, toUTF8String uses a 1kB stack buffer (which is not large enough + // for most web pages) and does pre-flighting followed by malloc for larger + // strings. We have to switch to obtaining the buffer with the maximum size + // (UTF-16 length * 3) without pre-flighting if necessary. + return normalized.toUTF8String(utf8); +} // Detects a language of the UTF-16 encoded zero-terminated text. // Returns: Language enum. Language DetectLanguageOfUnicodeText( const CompactLangDet::DetectionTables* detection_tables, - const WCHAR* text, bool is_plain_text, + const UChar* text, bool is_plain_text, bool* is_reliable, int* num_languages, - DWORD* error_code) { - if (!text || !num_languages) { - if (error_code) - *error_code = ERROR_INVALID_PARAMETER; - return NUM_LANGUAGES; - } - - // Normalize text first. We do not check the return value here since there - // is no meaningful recovery we can do in case of failure anyway. - // Since the vast majority of texts on the Internet is already normalized - // and languages which require normalization are easy to recognize by CLD - // anyway, we'll benefit more from trying to detect language in non-normalized - // text (and, with some probability, fail to recognize it) than to give up - // right away and return the unknown language here. - NormalizedUnicodeText nomalized_text; - nomalized_text.Normalize(NormalizationC, text); - - // Determine the size of the buffer required to store a lowercased text. - int lowercase_text_size = - ::LCMapString(NULL, LCMAP_LOWERCASE | LCMAP_LINGUISTIC_CASING, - nomalized_text.get(), -1, - NULL, 0); - if (!lowercase_text_size) { - if (error_code) - *error_code = ::GetLastError(); + int* error_code) { + if (!text || !num_languages) return NUM_LANGUAGES; - } - - scoped_array<WCHAR> lowercase_text(new WCHAR[lowercase_text_size]); - if (!lowercase_text.get()) - return NUM_LANGUAGES; - - // Covert text to lowercase. - int lowercasing_result = - ::LCMapString(NULL, LCMAP_LOWERCASE | LCMAP_LINGUISTIC_CASING, - nomalized_text.get(), -1, - lowercase_text.get(), lowercase_text_size); - if (!lowercasing_result) { - if (error_code) - *error_code = ::GetLastError(); + // Normalize text to NFC, lowercase and convert to UTF-8. + std::string utf8_encoded = NormalizeText(text); + if (utf8_encoded.empty()) return NUM_LANGUAGES; - } - - // Determine the size of the buffer required to covert text to UTF-8. - int utf8_encoded_buffer_size = - ::WideCharToMultiByte(CP_UTF8, 0, - lowercase_text.get(), -1, - NULL, 0, - NULL, NULL); - if (!utf8_encoded_buffer_size) { - if (error_code) - *error_code = ::GetLastError(); - return NUM_LANGUAGES; - } - - scoped_array<char> utf8_encoded_buffer( - new char[utf8_encoded_buffer_size]); - - // Convert text to UTF-8. - int utf8_encoding_result = - ::WideCharToMultiByte(CP_UTF8, 0, - lowercase_text.get(), -1, - utf8_encoded_buffer.get(), utf8_encoded_buffer_size, - NULL, NULL); - if (!utf8_encoding_result) { - if (error_code) - *error_code = ::GetLastError(); - return NUM_LANGUAGES; - } - - if (error_code) - *error_code = 0; // Engage core CLD library language detection. Language language3[3] = { @@ -107,8 +59,8 @@ Language DetectLanguageOfUnicodeText( // language3 array is always set according to the detection results and // is not affected by this heuristic. CompactLangDet::DetectLanguageSummary(detection_tables, - utf8_encoded_buffer.get(), - utf8_encoded_buffer_size, + utf8_encoded.c_str(), + utf8_encoded.length(), is_plain_text, language3, percent3, &text_bytes, is_reliable); diff --git a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.h b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.h index 7030691..c0d64aa 100644 --- a/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.h +++ b/third_party/cld/bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.h @@ -5,9 +5,8 @@ #ifndef BAR_TOOLBAR_CLD_I18N_ENCODINGS_COMPACT_LANG_DET_WIN_CLD_UNICODETEXT_H_ #define BAR_TOOLBAR_CLD_I18N_ENCODINGS_COMPACT_LANG_DET_WIN_CLD_UNICODETEXT_H_ -#include <windows.h> - #include "bar/toolbar/cld/i18n/languages/public/languages.h" +#include "unicode/utypes.h" namespace CompactLangDet { struct DetectionTables; @@ -33,9 +32,9 @@ namespace CompactLangDet { // for details. Language DetectLanguageOfUnicodeText( const CompactLangDet::DetectionTables* detection_tables, - const WCHAR* text, bool is_plain_text, + const UChar* text, bool is_plain_text, bool* is_reliable, int* num_languages, - DWORD* error_code); + int* error_code); #endif // BAR_TOOLBAR_CLD_I18N_ENCODINGS_COMPACT_LANG_DET_WIN_CLD_UNICODETEXT_H_ diff --git a/third_party/cld/base/string_util.h b/third_party/cld/base/string_util.h index 365d1bf..7717e5b 100644 --- a/third_party/cld/base/string_util.h +++ b/third_party/cld/base/string_util.h @@ -11,12 +11,18 @@ namespace base {
+#ifdef WIN32
// Compare the two strings s1 and s2 without regard to case using
// the current locale; returns 0 if they are equal, 1 if s1 > s2, and -1 if
// s2 > s1 according to a lexicographic comparison.
inline int strcasecmp(const char* s1, const char* s2) {
return _stricmp(s1, s2);
}
+#else
+inline int strcasecmp(const char* s1, const char* s2) {
+ return strcasecmp(s1, s2);
+}
+#endif
}
diff --git a/third_party/cld/cld.gyp b/third_party/cld/cld.gyp index 2f3f192..6913f2e 100644 --- a/third_party/cld/cld.gyp +++ b/third_party/cld/cld.gyp @@ -3,119 +3,133 @@ # found in the LICENSE file. { - 'conditions': [ - ['OS=="win"', { - 'targets': [ - { - 'target_name': 'cld', - 'type': '<(library)', - 'include_dirs': [ - '.', - ], - 'msvs_disabled_warnings': [4005, 4006, 4018, 4244, 4309, 4800], - 'defines': [ - 'CLD_WINDOWS', - ], - 'sources': [ - 'bar/common/scopedlibrary.h', - 'bar/common/scopedptr.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil_dbg.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil_dbg_empty.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/ext_lang_enc.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/ext_lang_enc.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/letterscript_enum.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/letterscript_enum.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/subsetsequence.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/subsetsequence.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/tote.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/tote.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/utf8propjustletter.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/utf8propletterscriptnum.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/utf8scannotjustletterspecial.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_128.cc', - # For now using the 128 bytes detection in order to save hundreds of KBs on the final package. - # 'bar/toolbar/cld/i18n/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_256.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_basictypes.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_commandlineflags.h', - # We use the static table at this point, so we don't need to compile the following files: - #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_dynamicstate.h', - #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_dynamicstate.cc', - #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_loadpolicy.cc', - #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_loadpolicy.h', - #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_loadpolicyinterface.h', - #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_resourceids.h', - #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_service.h', - #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_service.cc', - #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_serviceinterface.h', - #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_tables.cc', - #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_tables.h', - #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/resourceinmemory.cc', - #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/resourceinmemory.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_google.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_htmlutils.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_htmlutils_windows.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_logging.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_macros.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_scoped_ptr.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_scopedptr.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_strtoint.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unilib.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unilib_windows.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8statetable.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8statetable.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8utils.h', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8utils_windows.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/normalizedunicodetext.cc', - 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/normalizedunicodetext.h', - 'bar/toolbar/cld/i18n/encodings/internal/encodings.cc', - 'bar/toolbar/cld/i18n/encodings/proto/encodings.pb.h', - 'bar/toolbar/cld/i18n/encodings/public/encodings.h', - 'bar/toolbar/cld/i18n/languages/internal/languages.cc', - 'bar/toolbar/cld/i18n/languages/proto/languages.pb.h', - 'bar/toolbar/cld/i18n/languages/public/languages.h', - 'base/basictypes.h', - 'base/build_config.h', - 'base/casts.h', - 'base/commandlineflags.h', - 'base/global_strip_options.h', - 'base/logging.h', - 'base/macros.h', - 'base/port.h', - 'base/crash.h', - 'base/dynamic_annotations.h', - 'base/scoped_ptr.h', - 'base/stl_decl_msvc.h', - 'base/log_severity.h', - 'base/strtoint.h', - 'base/vlog_is_on.h', - 'base/string_util.h', - 'base/type_traits.h', - 'base/template_util.h', - ], - 'direct_dependent_settings': { - 'defines': [ - 'CLD_WINDOWS', - 'COMPILER_MSVC', - ], - }, - },], + 'targets': [ + { + 'target_name': 'cld', + 'type': '<(library)', + 'dependencies': [ + '../icu/icu.gyp:icuuc', + ], + 'include_dirs': [ + '.', + ], + 'defines': [ + 'CLD_WINDOWS', + ], + 'sources': [ + 'bar/common/scopedptr.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil_dbg.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/cldutil_dbg_empty.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/compact_lang_det_impl.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/ext_lang_enc.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/ext_lang_enc.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/getonescriptspan.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/letterscript_enum.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/letterscript_enum.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/subsetsequence.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/subsetsequence.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/tote.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/tote.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/utf8propjustletter.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/utf8propletterscriptnum.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/utf8scannotjustletterspecial.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/generated/compact_lang_det_generated_cjkbis_0.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/generated/compact_lang_det_generated_ctjkvz.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/generated/compact_lang_det_generated_longwords8_0.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/generated/compact_lang_det_generated_meanscore.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_128.cc', + # For now using the 128 bytes detection in order to save hundreds of KBs on the final package. + # 'bar/toolbar/cld/i18n/encodings/compact_lang_det/generated/compact_lang_det_generated_quads_256.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_basictypes.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_commandlineflags.h', + # We use the static table at this point, so we don't need to compile the following files: + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_dynamicstate.h', + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_dynamicstate.cc', + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_loadpolicy.cc', + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_loadpolicy.h', + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_loadpolicyinterface.h', + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_resourceids.h', + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_service.h', + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_service.cc', + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_serviceinterface.h', + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_tables.cc', + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_tables.h', + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/resourceinmemory.cc', + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/resourceinmemory.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_google.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_htmlutils.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_htmlutils_windows.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_logging.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_macros.h', + # None of files we build require these two headers. + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_scoped_ptr.h', + #'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_scopedptr.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_strtoint.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unicodetext.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unilib.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_unilib_windows.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8statetable.cc', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8statetable.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8utils.h', + 'bar/toolbar/cld/i18n/encodings/compact_lang_det/win/cld_utf8utils_windows.cc', + 'bar/toolbar/cld/i18n/encodings/internal/encodings.cc', + 'bar/toolbar/cld/i18n/encodings/proto/encodings.pb.h', + 'bar/toolbar/cld/i18n/encodings/public/encodings.h', + 'bar/toolbar/cld/i18n/languages/internal/languages.cc', + 'bar/toolbar/cld/i18n/languages/proto/languages.pb.h', + 'bar/toolbar/cld/i18n/languages/public/languages.h', + 'base/basictypes.h', + 'base/build_config.h', + 'base/casts.h', + 'base/commandlineflags.h', + 'base/global_strip_options.h', + 'base/logging.h', + 'base/macros.h', + 'base/port.h', + 'base/crash.h', + 'base/dynamic_annotations.h', + 'base/scoped_ptr.h', + 'base/stl_decl_msvc.h', + 'base/log_severity.h', + 'base/strtoint.h', + 'base/vlog_is_on.h', + 'base/string_util.h', + 'base/type_traits.h', + 'base/template_util.h', + ], + 'direct_dependent_settings': { + 'defines': [ + 'CLD_WINDOWS', + ], }, - ], + 'conditions': [ + ['OS=="win"', { + 'direct_dependent_settings': { + 'defines': [ + 'COMPILER_MSVC', + ], + }, + 'msvs_disabled_warnings': [4005, 4006, 4018, 4244, 4309, 4800], + }, + ], + ['OS!="win"', { + 'direct_dependent_settings': { + 'defines': [ + 'COMPILER_GCC', + ], + }, + }, + ], + ], + }, ], } |