From 807bc046a195e62f952f03a821525362fd818a8f Mon Sep 17 00:00:00 2001 From: "yuzo@chromium.org" Date: Wed, 15 Jul 2009 01:32:02 +0000 Subject: Fix: Linux file:// listings are sorted but not fully internationalized Changed the code such that file names are converted to UTF-16 before comparison. Ideally, the locale should be considered, but my conclusion is that ignoring it is a reasonable tradeoff. Passing a locale-aware comparator (collator) from src/chrome(or src/app?) to src/base requires too much effort, in my opinion. BUG=16179 TEST=Open a file:// URL for a directory that contains non-ascii-named files. Observe that files are sorted properly in the list, in a case-sensitive manner. Review URL: http://codereview.chromium.org/149449 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@20706 0039d316-1c4b-4281-b951-d872f2087c98 --- base/file_util_posix.cc | 79 +++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 67 insertions(+), 12 deletions(-) (limited to 'base/file_util_posix.cc') diff --git a/base/file_util_posix.cc b/base/file_util_posix.cc index 26f4b8f..58454cf 100644 --- a/base/file_util_posix.cc +++ b/base/file_util_posix.cc @@ -24,9 +24,14 @@ #include "base/basictypes.h" #include "base/eintr_wrapper.h" #include "base/file_path.h" +#include "base/lock.h" #include "base/logging.h" +#include "base/scoped_ptr.h" +#include "base/singleton.h" #include "base/string_util.h" +#include "base/sys_string_conversions.h" #include "base/time.h" +#include "unicode/coll.h" namespace { @@ -43,6 +48,68 @@ bool IsDirectory(const FTSENT* file) { } } +class LocaleAwareComparator { + public: + LocaleAwareComparator() { + UErrorCode error_code = U_ZERO_ERROR; + // Use the default collator. The default locale should have been properly + // set by the time this constructor is called. + collator_.reset(Collator::createInstance(error_code)); + DCHECK(U_SUCCESS(error_code)); + // Make it case-sensitive. + collator_->setStrength(Collator::TERTIARY); + // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we + // do not pay performance penalty to guarantee sort order correctness for + // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a + // reasonable tradeoff because such file names should be rare and the sort + // order doesn't change much anyway. + } + + // Note: A similar function is available in l10n_util. + // We cannot use it because base should not depend on l10n_util. + // TODO(yuzo): Move some of l10n_util to base. + int Compare(const string16& a, const string16& b) { + // We are not sure if Collator::compare is thread-safe. + // Use an AutoLock just in case. + AutoLock auto_lock(lock_); + + UErrorCode error_code = U_ZERO_ERROR; + UCollationResult result = collator_->compare( + static_cast(a.c_str()), + static_cast(a.length()), + static_cast(b.c_str()), + static_cast(b.length()), + error_code); + DCHECK(U_SUCCESS(error_code)); + return result; + } + + private: + scoped_ptr collator_; + Lock lock_; + friend struct DefaultSingletonTraits; + + DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator); +}; + +int CompareFiles(const FTSENT** a, const FTSENT** b) { + // Order lexicographically with directories before other files. + const bool a_is_dir = IsDirectory(*a); + const bool b_is_dir = IsDirectory(*b); + if (a_is_dir != b_is_dir) + return a_is_dir ? -1 : 1; + + // On linux, the file system encoding is not defined. We assume + // SysNativeMBToWide takes care of it. + // + // ICU's collator can take strings in OS native encoding. But we convert the + // strings to UTF-16 ourselves to ensure conversion consistency. + // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16? + return Singleton()->Compare( + WideToUTF16(base::SysNativeMBToWide((*a)->fts_name)), + WideToUTF16(base::SysNativeMBToWide((*b)->fts_name))); +} + } // namespace namespace file_util { @@ -576,18 +643,6 @@ void FileEnumerator::GetFindInfo(FindInfo* info) { info->filename.assign(fts_ent_->fts_name); } -int CompareFiles(const FTSENT** a, const FTSENT** b) { - // Order lexicographically with directories before other files. - const bool a_is_dir = IsDirectory(*a); - const bool b_is_dir = IsDirectory(*b); - if (a_is_dir != b_is_dir) - return a_is_dir ? -1 : 1; - - // TODO(yuzo): make this internationalized when encoding detection function - // becomes available. - return base::strcasecmp((*a)->fts_name, (*b)->fts_name); -} - // As it stands, this method calls itself recursively when the next item of // the fts enumeration doesn't match (type, pattern, etc.). In the case of // large directories with many files this can be quite deep. -- cgit v1.1