From 1d6be41bbbd90da118ba9a6295b2164ce97aca93 Mon Sep 17 00:00:00 2001
From: "estade@chromium.org"
 <estade@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>
Date: Wed, 2 Dec 2009 18:31:14 +0000
Subject: Linux: when reading html from clipboard, interpret BOM to mean that
 the encoding is UTF-16. Otherwise, continue assuming it's utf-8.

From firefox source:

        /*
         * "text/html" can be encoded UCS2. It is recommended that
         * documents transmitted as UCS2 always begin with a ZERO-WIDTH
         * NON-BREAKING SPACE character (hexadecimal FEFF, also called
         * Byte Order Mark (BOM)). Adding BOM can help other app to
         * detect mozilla use UCS2 encoding when copy-paste.
         */

BUG=29145

Review URL: http://codereview.chromium.org/455030

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@33585 0039d316-1c4b-4281-b951-d872f2087c98
---
 app/clipboard/clipboard_linux.cc | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

(limited to 'app/clipboard')

diff --git a/app/clipboard/clipboard_linux.cc b/app/clipboard/clipboard_linux.cc
index f06b9b2..dba93f0 100644
--- a/app/clipboard/clipboard_linux.cc
+++ b/app/clipboard/clipboard_linux.cc
@@ -326,7 +326,16 @@ void Clipboard::ReadHTML(Clipboard::Buffer buffer, string16* markup,
   if (!data)
     return;
 
-  UTF8ToUTF16(reinterpret_cast<char*>(data->data), data->length, markup);
+  // If the data starts with 0xFEFF, i.e., Byte Order Mark, assume it is
+  // UTF-16, otherwise assume UTF-8.
+  if (data->length >= 2 &&
+      reinterpret_cast<uint16_t*>(data->data)[0] == 0xFEFF) {
+    markup->assign(reinterpret_cast<uint16_t*>(data->data) + 1,
+                   (data->length / 2) - 1);
+  } else {
+    UTF8ToUTF16(reinterpret_cast<char*>(data->data), data->length, markup);
+  }
+
   gtk_selection_data_free(data);
 }
 
-- 
cgit v1.1