From 1d6be41bbbd90da118ba9a6295b2164ce97aca93 Mon Sep 17 00:00:00 2001 From: "estade@chromium.org" Date: Wed, 2 Dec 2009 18:31:14 +0000 Subject: Linux: when reading html from clipboard, interpret BOM to mean that the encoding is UTF-16. Otherwise, continue assuming it's utf-8. From firefox source: /* * "text/html" can be encoded UCS2. It is recommended that * documents transmitted as UCS2 always begin with a ZERO-WIDTH * NON-BREAKING SPACE character (hexadecimal FEFF, also called * Byte Order Mark (BOM)). Adding BOM can help other app to * detect mozilla use UCS2 encoding when copy-paste. */ BUG=29145 Review URL: http://codereview.chromium.org/455030 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@33585 0039d316-1c4b-4281-b951-d872f2087c98 --- app/clipboard/clipboard_linux.cc | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'app/clipboard') diff --git a/app/clipboard/clipboard_linux.cc b/app/clipboard/clipboard_linux.cc index f06b9b2..dba93f0 100644 --- a/app/clipboard/clipboard_linux.cc +++ b/app/clipboard/clipboard_linux.cc @@ -326,7 +326,16 @@ void Clipboard::ReadHTML(Clipboard::Buffer buffer, string16* markup, if (!data) return; - UTF8ToUTF16(reinterpret_cast(data->data), data->length, markup); + // If the data starts with 0xFEFF, i.e., Byte Order Mark, assume it is + // UTF-16, otherwise assume UTF-8. + if (data->length >= 2 && + reinterpret_cast(data->data)[0] == 0xFEFF) { + markup->assign(reinterpret_cast(data->data) + 1, + (data->length / 2) - 1); + } else { + UTF8ToUTF16(reinterpret_cast(data->data), data->length, markup); + } + gtk_selection_data_free(data); } -- cgit v1.1