summaryrefslogtreecommitdiffstats
path: root/chrome/renderer
diff options
context:
space:
mode:
authorjcivelli@chromium.org <jcivelli@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-08-02 18:40:08 +0000
committerjcivelli@chromium.org <jcivelli@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-08-02 18:40:08 +0000
commita955868874c52f54e5ad1431cb0d377865257377 (patch)
treea66f7cdaae9bb44714b8d16cf980b324b6069d15 /chrome/renderer
parent969a777eb646c394744e504f943778f34e624694 (diff)
downloadchromium_src-a955868874c52f54e5ad1431cb0d377865257377.zip
chromium_src-a955868874c52f54e5ad1431cb0d377865257377.tar.gz
chromium_src-a955868874c52f54e5ad1431cb0d377865257377.tar.bz2
Fix language detection with short text pages.
Changed CLD wrapper function to return the number of bytes used for language detection, and using that in the render view to ignore detection for short texts. BUG=45156 TEST=Visit www.voila.fr, an infobar should show reporting the page is in French. Visit http://jdvhotels.com/calendar.html?datetime=1274857200000&id=0, no translate infobar should show. Review URL: http://codereview.chromium.org/3038018 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@54568 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer')
-rw-r--r--chrome/renderer/render_view.cc13
1 files changed, 6 insertions, 7 deletions
diff --git a/chrome/renderer/render_view.cc b/chrome/renderer/render_view.cc
index c2db5d5..5bdf140 100644
--- a/chrome/renderer/render_view.cc
+++ b/chrome/renderer/render_view.cc
@@ -371,18 +371,17 @@ static bool CrossesExtensionExtents(WebFrame* frame, const GURL& new_url) {
// Note this only works on Windows at this time. It always returns 'unknown'
// on other platforms.
static std::string DetermineTextLanguage(const string16& text) {
- // Text with less than 100 bytes will probably not provide good results.
- // Report it as unknown language.
- if (text.length() < 100)
- return chrome::kUnknownLanguageCode;
-
std::string language = chrome::kUnknownLanguageCode;
int num_languages = 0;
+ int text_bytes = 0;
bool is_reliable = false;
Language cld_language =
DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
- &num_languages, NULL);
- if (is_reliable && cld_language != NUM_LANGUAGES &&
+ &num_languages, NULL, &text_bytes);
+ // We don't trust the result if the CLD reports that the detection is not
+ // reliable, or if the actual text used to detect the language was less than
+ // 100 bytes (short texts can often lead to wrong results).
+ if (is_reliable && text_bytes >= 100 && cld_language != NUM_LANGUAGES &&
cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) {
// We should not use LanguageCode_ISO_639_1 because it does not cover all
// the languages CLD can detect. As a result, it'll return the invalid