diff options
author | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-05-11 09:19:30 +0000 |
---|---|---|
committer | phajdan.jr@chromium.org <phajdan.jr@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-05-11 09:19:30 +0000 |
commit | 193c35145799c33bde24a470f460587954d70aba (patch) | |
tree | 330c97462719bb266940cec9cd40ea70531d8500 /base | |
parent | 587f4ff6420df90d4e6160aeb78918fcf55e3599 (diff) | |
download | chromium_src-193c35145799c33bde24a470f460587954d70aba.zip chromium_src-193c35145799c33bde24a470f460587954d70aba.tar.gz chromium_src-193c35145799c33bde24a470f460587954d70aba.tar.bz2 |
FTP: fix navigating to files listed under non-ASCII characters
We need to convert the file name back to server encoding.
BUG=38016
TEST=see bug
Review URL: http://codereview.chromium.org/1857002
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@46900 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r-- | base/base.gyp | 2 | ||||
-rw-r--r-- | base/i18n/icu_encoding_detection.cc | 38 | ||||
-rw-r--r-- | base/i18n/icu_encoding_detection.h | 19 |
3 files changed, 59 insertions, 0 deletions
diff --git a/base/base.gyp b/base/base.gyp index 88f8702..460319b 100644 --- a/base/base.gyp +++ b/base/base.gyp @@ -33,6 +33,8 @@ 'sources': [ 'i18n/file_util_icu.cc', 'i18n/file_util_icu.h', + 'i18n/icu_encoding_detection.cc', + 'i18n/icu_encoding_detection.h', 'i18n/icu_string_conversions.cc', 'i18n/icu_string_conversions.h', 'i18n/icu_util.cc', diff --git a/base/i18n/icu_encoding_detection.cc b/base/i18n/icu_encoding_detection.cc new file mode 100644 index 0000000..55785c5 --- /dev/null +++ b/base/i18n/icu_encoding_detection.cc @@ -0,0 +1,38 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "base/i18n/icu_encoding_detection.h" + +#include "base/string_util.h" +#include "unicode/ucsdet.h" + +namespace base { + +// TODO(jungshik): We can apply more heuristics here (e.g. using various hints +// like TLD, the UI language/default encoding of a client, etc). +bool DetectEncoding(const std::string& text, std::string* encoding) { + if (IsStringASCII(text)) { + *encoding = std::string(); + return true; + } + + UErrorCode status = U_ZERO_ERROR; + UCharsetDetector* detector = ucsdet_open(&status); + ucsdet_setText(detector, text.data(), static_cast<int32_t>(text.length()), + &status); + // TODO(jungshik): Should we check the quality of the match? A rather + // arbitrary number is assigned by ICU and it's hard to come up with + // a lower limit. + const UCharsetMatch* match = ucsdet_detect(detector, &status); + const char* detected_encoding = ucsdet_getName(match, &status); + ucsdet_close(detector); + + if (U_FAILURE(status)) + return false; + + *encoding = detected_encoding; + return true; +} + +} // namespace base diff --git a/base/i18n/icu_encoding_detection.h b/base/i18n/icu_encoding_detection.h new file mode 100644 index 0000000..0d8e5d8 --- /dev/null +++ b/base/i18n/icu_encoding_detection.h @@ -0,0 +1,19 @@ +// Copyright (c) 2010 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#ifndef BASE_I18N_ICU_ENCODING_DETECTION_H_ +#define BASE_I18N_ICU_ENCODING_DETECTION_H_ + +#include <string> + +namespace base { + +// Detect encoding of |text| and put the name of encoding (as returned by ICU) +// in |encoding|. For ASCII texts |encoding| will be set to an empty string. +// Returns true on success. +bool DetectEncoding(const std::string& text, std::string* encoding); + +} // namespace base + +#endif // BASE_I18N_ICU_ENCODING_DETECTION_H_ |