summaryrefslogtreecommitdiffstats
path: root/base
diff options
context:
space:
mode:
authorphajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-11 09:19:30 +0000
committerphajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2010-05-11 09:19:30 +0000
commit193c35145799c33bde24a470f460587954d70aba (patch)
tree330c97462719bb266940cec9cd40ea70531d8500 /base
parent587f4ff6420df90d4e6160aeb78918fcf55e3599 (diff)
downloadchromium_src-193c35145799c33bde24a470f460587954d70aba.zip
chromium_src-193c35145799c33bde24a470f460587954d70aba.tar.gz
chromium_src-193c35145799c33bde24a470f460587954d70aba.tar.bz2
FTP: fix navigating to files listed under non-ASCII characters
We need to convert the file name back to server encoding. BUG=38016 TEST=see bug Review URL: http://codereview.chromium.org/1857002 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@46900 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r--base/base.gyp2
-rw-r--r--base/i18n/icu_encoding_detection.cc38
-rw-r--r--base/i18n/icu_encoding_detection.h19
3 files changed, 59 insertions, 0 deletions
diff --git a/base/base.gyp b/base/base.gyp
index 88f8702..460319b 100644
--- a/base/base.gyp
+++ b/base/base.gyp
@@ -33,6 +33,8 @@
'sources': [
'i18n/file_util_icu.cc',
'i18n/file_util_icu.h',
+ 'i18n/icu_encoding_detection.cc',
+ 'i18n/icu_encoding_detection.h',
'i18n/icu_string_conversions.cc',
'i18n/icu_string_conversions.h',
'i18n/icu_util.cc',
diff --git a/base/i18n/icu_encoding_detection.cc b/base/i18n/icu_encoding_detection.cc
new file mode 100644
index 0000000..55785c5
--- /dev/null
+++ b/base/i18n/icu_encoding_detection.cc
@@ -0,0 +1,38 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "base/i18n/icu_encoding_detection.h"
+
+#include "base/string_util.h"
+#include "unicode/ucsdet.h"
+
+namespace base {
+
+// TODO(jungshik): We can apply more heuristics here (e.g. using various hints
+// like TLD, the UI language/default encoding of a client, etc).
+bool DetectEncoding(const std::string& text, std::string* encoding) {
+ if (IsStringASCII(text)) {
+ *encoding = std::string();
+ return true;
+ }
+
+ UErrorCode status = U_ZERO_ERROR;
+ UCharsetDetector* detector = ucsdet_open(&status);
+ ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()),
+ &status);
+ // TODO(jungshik): Should we check the quality of the match? A rather
+ // arbitrary number is assigned by ICU and it's hard to come up with
+ // a lower limit.
+ const UCharsetMatch* match = ucsdet_detect(detector, &status);
+ const char* detected_encoding = ucsdet_getName(match, &status);
+ ucsdet_close(detector);
+
+ if (U_FAILURE(status))
+ return false;
+
+ *encoding = detected_encoding;
+ return true;
+}
+
+} // namespace base
diff --git a/base/i18n/icu_encoding_detection.h b/base/i18n/icu_encoding_detection.h
new file mode 100644
index 0000000..0d8e5d8
--- /dev/null
+++ b/base/i18n/icu_encoding_detection.h
@@ -0,0 +1,19 @@
+// Copyright (c) 2010 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_I18N_ICU_ENCODING_DETECTION_H_
+#define BASE_I18N_ICU_ENCODING_DETECTION_H_
+
+#include <string>
+
+namespace base {
+
+// Detect encoding of |text| and put the name of encoding (as returned by ICU)
+// in |encoding|. For ASCII texts |encoding| will be set to an empty string.
+// Returns true on success.
+bool DetectEncoding(const std::string& text, std::string* encoding);
+
+} // namespace base
+
+#endif // BASE_I18N_ICU_ENCODING_DETECTION_H_