summaryrefslogtreecommitdiffstats
path: root/chrome/renderer/render_view.cc
diff options
context:
space:
mode:
authorbrettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-06-11 21:12:36 +0000
committerbrettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-06-11 21:12:36 +0000
commite510620c6b1bd4eff20b101c0cdb63166e0b117e (patch)
tree97d58f7932df064b94fd928ed9978731054d6253 /chrome/renderer/render_view.cc
parent3acba10eb98cd7c4e877f78537353d866b59e62f (diff)
downloadchromium_src-e510620c6b1bd4eff20b101c0cdb63166e0b117e.zip
chromium_src-e510620c6b1bd4eff20b101c0cdb63166e0b117e.tar.gz
chromium_src-e510620c6b1bd4eff20b101c0cdb63166e0b117e.tar.bz2
Convert page contents grabbing from wide to UTF16. The current code is a bit
silly because it would capture it in UTF16, then convert to wide, send it to the browser, then convert it to UTF-8 for FTS. TEST=none BUG=none Review URL: http://codereview.chromium.org/2714012 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@49594 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'chrome/renderer/render_view.cc')
-rwxr-xr-xchrome/renderer/render_view.cc13
1 files changed, 6 insertions, 7 deletions
diff --git a/chrome/renderer/render_view.cc b/chrome/renderer/render_view.cc
index eaf7651..f4c952c 100755
--- a/chrome/renderer/render_view.cc
+++ b/chrome/renderer/render_view.cc
@@ -355,7 +355,7 @@ static bool CrossesExtensionExtents(WebFrame* frame, const GURL& new_url) {
//
// Note this only works on Windows at this time. It always returns 'unknown'
// on other platforms.
-static std::string DetermineTextLanguage(const std::wstring& text) {
+static std::string DetermineTextLanguage(const string16& text) {
// Text with less than 100 bytes will probably not provide good results.
// Report it as unknown language.
if (text.length() < 100)
@@ -364,9 +364,8 @@ static std::string DetermineTextLanguage(const std::wstring& text) {
std::string language = RenderView::kUnknownLanguageCode;
int num_languages = 0;
bool is_reliable = false;
- string16 input = WideToUTF16(text);
Language cld_language =
- DetectLanguageOfUnicodeText(NULL, input.c_str(), true, &is_reliable,
+ DetectLanguageOfUnicodeText(NULL, text.c_str(), true, &is_reliable,
&num_languages, NULL);
if (is_reliable && cld_language != NUM_LANGUAGES &&
cld_language != UNKNOWN_LANGUAGE && cld_language != TG_UNKNOWN_LANGUAGE) {
@@ -822,7 +821,7 @@ void RenderView::CapturePageInfo(int load_id, bool preliminary_capture) {
return;
// Retrieve the frame's full text.
- std::wstring contents;
+ string16 contents;
CaptureText(main_frame, &contents);
if (contents.size()) {
base::TimeTicks begin_time = base::TimeTicks::Now();
@@ -839,7 +838,7 @@ void RenderView::CapturePageInfo(int load_id, bool preliminary_capture) {
OnCaptureThumbnail();
}
-void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) {
+void RenderView::CaptureText(WebFrame* frame, string16* contents) {
contents->clear();
if (!frame)
return;
@@ -849,7 +848,7 @@ void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) {
#endif
// get the contents of the frame
- *contents = UTF16ToWideHack(frame->contentAsText(kMaxIndexChars));
+ *contents = frame->contentAsText(kMaxIndexChars);
#ifdef TIME_TEXT_RETRIEVAL
double end = time_util::GetHighResolutionTimeNow();
@@ -863,7 +862,7 @@ void RenderView::CaptureText(WebFrame* frame, std::wstring* contents) {
// partial word indexed at the end that might have been clipped. Therefore,
// terminate the string at the last space to ensure no words are clipped.
if (contents->size() == kMaxIndexChars) {
- size_t last_space_index = contents->find_last_of(kWhitespaceWide);
+ size_t last_space_index = contents->find_last_of(kWhitespaceUTF16);
if (last_space_index == std::wstring::npos)
return; // don't index if we got a huge block of text with no spaces
contents->resize(last_space_index);