Convert page contents grabbing from wide to UTF16. The current code is a bit

silly because it would capture it in UTF16, then convert to wide, send it to the browser, then convert it to UTF-8 for FTS. TEST=none BUG=none Review URL: http://codereview.chromium.org/2714012 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@49594 0039d316-1c4b-4281-b951-d872f2087c98
author: brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-06-11 21:12:36 +0000
committer: brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2010-06-11 21:12:36 +0000
commit: e510620c6b1bd4eff20b101c0cdb63166e0b117e (patch)
tree: 97d58f7932df064b94fd928ed9978731054d6253 /chrome/renderer/render_view.cc
parent: 3acba10eb98cd7c4e877f78537353d866b59e62f (diff)
download: chromium_src-e510620c6b1bd4eff20b101c0cdb63166e0b117e.zip
chromium_src-e510620c6b1bd4eff20b101c0cdb63166e0b117e.tar.gz
chromium_src-e510620c6b1bd4eff20b101c0cdb63166e0b117e.tar.bz2
1 files changed, 6 insertions, 7 deletions
diff --git a/chrome/renderer/render_view.cc b/chrome/renderer/render_view.cc
index eaf7651..f4c952c 100755
--- a/chrome/renderer/render_view.cc
+++ b/chrome/renderer/render_view.cc
@@ -355,7 +355,7 @@ static bool CrossesExtensionExtents(WebFrame* frame, const GURL& new_url) {
 //
 // Note this only works on Windows at this time.  It always returns 'unknown'
 // on other platforms.
-static std::string DetermineTextLanguage(const std::wstring& text) {
+static std::string DetermineTextLanguage(const string16& text) {
   // Text with less than 100 bytes will probably not provide good results.
   // Report it as unknown language.
   if (text.length() < 100)
@@ -364,9 +364,8 @@ static std::string DetermineTextLanguage(const std::wstring& text) {
   std::string language = RenderView::kUnknownLanguageCode;
   int num_languages = 0;
   bool is_reliable = false;
-  string16 input = WideToUTF16(text);
   Language cld_language =
-      DetectLanguageOfUnicodeText(NULL, input.c_str(), true, &is_reliable,
+      DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
                                   &num_languages, NULL);
   if (is_reliable && cld_language != NUM_LANGUAGES &&
       cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) {
@@ -822,7 +821,7 @@ void RenderView::CapturePageInfo(int load_id, bool preliminary_capture) {
     return;
 
   // Retrieve the frame's full text.
-  std::wstring contents;
+  string16 contents;
   CaptureText(main_frame, &contents);
   if (contents.size()) {
     base::TimeTicks begin_time = base::TimeTicks::Now();
@@ -839,7 +838,7 @@ void RenderView::CapturePageInfo(int load_id, bool preliminary_capture) {
   OnCaptureThumbnail();
 }
 
-void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) {
+void RenderView::CaptureText(WebFrame* frame, string16* contents) {
   contents->clear();
   if (!frame)
     return;
@@ -849,7 +848,7 @@ void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) {
 #endif
 
   // get the contents of the frame
-  *contents = UTF16ToWideHack(frame->contentAsText(kMaxIndexChars));
+  *contents = frame->contentAsText(kMaxIndexChars);
 
 #ifdef TIME_TEXT_RETRIEVAL
   double end = time_util::GetHighResolutionTimeNow();
@@ -863,7 +862,7 @@ void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) {
   // partial word indexed at the end that might have been clipped. Therefore,
   // terminate the string at the last space to ensure no words are clipped.
   if (contents->size() == kMaxIndexChars) {
-    size_t last_space_index = contents->find_last_of(kWhitespaceWide);
+    size_t last_space_index = contents->find_last_of(kWhitespaceUTF16);
     if (last_space_index == std::wstring::npos)
       return;  // don't index if we got a huge block of text with no spaces
     contents->resize(last_space_index);
author	brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-06-11 21:12:36 +0000
committer	brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2010-06-11 21:12:36 +0000
commit	e510620c6b1bd4eff20b101c0cdb63166e0b117e (patch)
tree	97d58f7932df064b94fd928ed9978731054d6253 /chrome/renderer/render_view.cc
parent	3acba10eb98cd7c4e877f78537353d866b59e62f (diff)
download	chromium_src-e510620c6b1bd4eff20b101c0cdb63166e0b117e.zip chromium_src-e510620c6b1bd4eff20b101c0cdb63166e0b117e.tar.gz chromium_src-e510620c6b1bd4eff20b101c0cdb63166e0b117e.tar.bz2