diff options
author | yuzo@chromium.org <yuzo@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-07-15 01:32:02 +0000 |
---|---|---|
committer | yuzo@chromium.org <yuzo@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-07-15 01:32:02 +0000 |
commit | 807bc046a195e62f952f03a821525362fd818a8f (patch) | |
tree | 22ca7be4b070ccbc938a1f9f077b9a14b7b5eb7f /base/file_util_posix.cc | |
parent | 5c469efa13123f160b6f276a6eeda32d4fdea66d (diff) | |
download | chromium_src-807bc046a195e62f952f03a821525362fd818a8f.zip chromium_src-807bc046a195e62f952f03a821525362fd818a8f.tar.gz chromium_src-807bc046a195e62f952f03a821525362fd818a8f.tar.bz2 |
Fix: Linux file:// listings are sorted but not fully internationalized
Changed the code such that file names are converted to UTF-16 before
comparison. Ideally, the locale should be considered, but my conclusion is that
ignoring it is a reasonable tradeoff. Passing a locale-aware comparator
(collator) from src/chrome(or src/app?) to src/base requires too much effort, in
my opinion.
BUG=16179
TEST=Open a file:// URL for a directory that contains non-ascii-named files.
Observe that files are sorted properly in the list, in a case-sensitive manner.
Review URL: http://codereview.chromium.org/149449
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@20706 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/file_util_posix.cc')
-rw-r--r-- | base/file_util_posix.cc | 79 |
1 files changed, 67 insertions, 12 deletions
diff --git a/base/file_util_posix.cc b/base/file_util_posix.cc index 26f4b8f..58454cf 100644 --- a/base/file_util_posix.cc +++ b/base/file_util_posix.cc @@ -24,9 +24,14 @@ #include "base/basictypes.h" #include "base/eintr_wrapper.h" #include "base/file_path.h" +#include "base/lock.h" #include "base/logging.h" +#include "base/scoped_ptr.h" +#include "base/singleton.h" #include "base/string_util.h" +#include "base/sys_string_conversions.h" #include "base/time.h" +#include "unicode/coll.h" namespace { @@ -43,6 +48,68 @@ bool IsDirectory(const FTSENT* file) { } } +class LocaleAwareComparator { + public: + LocaleAwareComparator() { + UErrorCode error_code = U_ZERO_ERROR; + // Use the default collator. The default locale should have been properly + // set by the time this constructor is called. + collator_.reset(Collator::createInstance(error_code)); + DCHECK(U_SUCCESS(error_code)); + // Make it case-sensitive. + collator_->setStrength(Collator::TERTIARY); + // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we + // do not pay performance penalty to guarantee sort order correctness for + // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a + // reasonable tradeoff because such file names should be rare and the sort + // order doesn't change much anyway. + } + + // Note: A similar function is available in l10n_util. + // We cannot use it because base should not depend on l10n_util. + // TODO(yuzo): Move some of l10n_util to base. + int Compare(const string16& a, const string16& b) { + // We are not sure if Collator::compare is thread-safe. + // Use an AutoLock just in case. + AutoLock auto_lock(lock_); + + UErrorCode error_code = U_ZERO_ERROR; + UCollationResult result = collator_->compare( + static_cast<const UChar*>(a.c_str()), + static_cast<int>(a.length()), + static_cast<const UChar*>(b.c_str()), + static_cast<int>(b.length()), + error_code); + DCHECK(U_SUCCESS(error_code)); + return result; + } + + private: + scoped_ptr<Collator> collator_; + Lock lock_; + friend struct DefaultSingletonTraits<LocaleAwareComparator>; + + DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator); +}; + +int CompareFiles(const FTSENT** a, const FTSENT** b) { + // Order lexicographically with directories before other files. + const bool a_is_dir = IsDirectory(*a); + const bool b_is_dir = IsDirectory(*b); + if (a_is_dir != b_is_dir) + return a_is_dir ? -1 : 1; + + // On linux, the file system encoding is not defined. We assume + // SysNativeMBToWide takes care of it. + // + // ICU's collator can take strings in OS native encoding. But we convert the + // strings to UTF-16 ourselves to ensure conversion consistency. + // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16? + return Singleton<LocaleAwareComparator>()->Compare( + WideToUTF16(base::SysNativeMBToWide((*a)->fts_name)), + WideToUTF16(base::SysNativeMBToWide((*b)->fts_name))); +} + } // namespace namespace file_util { @@ -576,18 +643,6 @@ void FileEnumerator::GetFindInfo(FindInfo* info) { info->filename.assign(fts_ent_->fts_name); } -int CompareFiles(const FTSENT** a, const FTSENT** b) { - // Order lexicographically with directories before other files. - const bool a_is_dir = IsDirectory(*a); - const bool b_is_dir = IsDirectory(*b); - if (a_is_dir != b_is_dir) - return a_is_dir ? -1 : 1; - - // TODO(yuzo): make this internationalized when encoding detection function - // becomes available. - return base::strcasecmp((*a)->fts_name, (*b)->fts_name); -} - // As it stands, this method calls itself recursively when the next item of // the fts enumeration doesn't match (type, pattern, etc.). In the case of // large directories with many files this can be quite deep. |