diff options
author | brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-06-11 21:12:36 +0000 |
---|---|---|
committer | brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-06-11 21:12:36 +0000 |
commit | e510620c6b1bd4eff20b101c0cdb63166e0b117e (patch) | |
tree | 97d58f7932df064b94fd928ed9978731054d6253 /chrome/renderer/render_view.cc | |
parent | 3acba10eb98cd7c4e877f78537353d866b59e62f (diff) | |
download | chromium_src-e510620c6b1bd4eff20b101c0cdb63166e0b117e.zip chromium_src-e510620c6b1bd4eff20b101c0cdb63166e0b117e.tar.gz chromium_src-e510620c6b1bd4eff20b101c0cdb63166e0b117e.tar.bz2 |
Convert page contents grabbing from wide to UTF16. The current code is a bit
silly because it would capture it in UTF16, then convert to wide, send it to
the browser, then convert it to UTF-8 for FTS.
TEST=none
BUG=none
Review URL: http://codereview.chromium.org/2714012
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@49594 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer/render_view.cc')
-rwxr-xr-x | chrome/renderer/render_view.cc | 13 |
1 files changed, 6 insertions, 7 deletions
diff --git a/chrome/renderer/render_view.cc b/chrome/renderer/render_view.cc index eaf7651..f4c952c 100755 --- a/chrome/renderer/render_view.cc +++ b/chrome/renderer/render_view.cc @@ -355,7 +355,7 @@ static bool CrossesExtensionExtents(WebFrame* frame, const GURL& new_url) { // // Note this only works on Windows at this time. It always returns 'unknown' // on other platforms. -static std::string DetermineTextLanguage(const std::wstring& text) { +static std::string DetermineTextLanguage(const string16& text) { // Text with less than 100 bytes will probably not provide good results. // Report it as unknown language. if (text.length() < 100) @@ -364,9 +364,8 @@ static std::string DetermineTextLanguage(const std::wstring& text) { std::string language = RenderView::kUnknownLanguageCode; int num_languages = 0; bool is_reliable = false; - string16 input = WideToUTF16(text); Language cld_language = - DetectLanguageOfUnicodeText(NULL, input.c_str(), true, &is_reliable, + DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable, &num_languages, NULL); if (is_reliable && cld_language != NUM_LANGUAGES && cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) { @@ -822,7 +821,7 @@ void RenderView::CapturePageInfo(int load_id, bool preliminary_capture) { return; // Retrieve the frame's full text. - std::wstring contents; + string16 contents; CaptureText(main_frame, &contents); if (contents.size()) { base::TimeTicks begin_time = base::TimeTicks::Now(); @@ -839,7 +838,7 @@ void RenderView::CapturePageInfo(int load_id, bool preliminary_capture) { OnCaptureThumbnail(); } -void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) { +void RenderView::CaptureText(WebFrame* frame, string16* contents) { contents->clear(); if (!frame) return; @@ -849,7 +848,7 @@ void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) { #endif // get the contents of the frame - *contents = UTF16ToWideHack(frame->contentAsText(kMaxIndexChars)); + *contents = frame->contentAsText(kMaxIndexChars); #ifdef TIME_TEXT_RETRIEVAL double end = time_util::GetHighResolutionTimeNow(); @@ -863,7 +862,7 @@ void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) { // partial word indexed at the end that might have been clipped. Therefore, // terminate the string at the last space to ensure no words are clipped. if (contents->size() == kMaxIndexChars) { - size_t last_space_index = contents->find_last_of(kWhitespaceWide); + size_t last_space_index = contents->find_last_of(kWhitespaceUTF16); if (last_space_index == std::wstring::npos) return; // don't index if we got a huge block of text with no spaces contents->resize(last_space_index); |