summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authoryuzo@chromium.org <yuzo@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-07-15 01:32:02 +0000
committeryuzo@chromium.org <yuzo@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-07-15 01:32:02 +0000
commit807bc046a195e62f952f03a821525362fd818a8f (patch)
tree22ca7be4b070ccbc938a1f9f077b9a14b7b5eb7f
parent5c469efa13123f160b6f276a6eeda32d4fdea66d (diff)
downloadchromium_src-807bc046a195e62f952f03a821525362fd818a8f.zip
chromium_src-807bc046a195e62f952f03a821525362fd818a8f.tar.gz
chromium_src-807bc046a195e62f952f03a821525362fd818a8f.tar.bz2
Fix: Linux file:// listings are sorted but not fully internationalized
Changed the code such that file names are converted to UTF-16 before comparison. Ideally, the locale should be considered, but my conclusion is that ignoring it is a reasonable tradeoff. Passing a locale-aware comparator (collator) from src/chrome(or src/app?) to src/base requires too much effort, in my opinion. BUG=16179 TEST=Open a file:// URL for a directory that contains non-ascii-named files. Observe that files are sorted properly in the list, in a case-sensitive manner. Review URL: http://codereview.chromium.org/149449 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@20706 0039d316-1c4b-4281-b951-d872f2087c98
-rw-r--r--base/file_util_posix.cc79
1 files changed, 67 insertions, 12 deletions
diff --git a/base/file_util_posix.cc b/base/file_util_posix.cc
index 26f4b8f..58454cf 100644
--- a/base/file_util_posix.cc
+++ b/base/file_util_posix.cc
@@ -24,9 +24,14 @@
#include "base/basictypes.h"
#include "base/eintr_wrapper.h"
#include "base/file_path.h"
+#include "base/lock.h"
#include "base/logging.h"
+#include "base/scoped_ptr.h"
+#include "base/singleton.h"
#include "base/string_util.h"
+#include "base/sys_string_conversions.h"
#include "base/time.h"
+#include "unicode/coll.h"
namespace {
@@ -43,6 +48,68 @@ bool IsDirectory(const FTSENT* file) {
}
}
+class LocaleAwareComparator {
+ public:
+ LocaleAwareComparator() {
+ UErrorCode error_code = U_ZERO_ERROR;
+ // Use the default collator. The default locale should have been properly
+ // set by the time this constructor is called.
+ collator_.reset(Collator::createInstance(error_code));
+ DCHECK(U_SUCCESS(error_code));
+ // Make it case-sensitive.
+ collator_->setStrength(Collator::TERTIARY);
+ // Note: We do not set UCOL_NORMALIZATION_MODE attribute. In other words, we
+ // do not pay performance penalty to guarantee sort order correctness for
+ // non-FCD (http://unicode.org/notes/tn5/#FCD) file names. This should be a
+ // reasonable tradeoff because such file names should be rare and the sort
+ // order doesn't change much anyway.
+ }
+
+ // Note: A similar function is available in l10n_util.
+ // We cannot use it because base should not depend on l10n_util.
+ // TODO(yuzo): Move some of l10n_util to base.
+ int Compare(const string16& a, const string16& b) {
+ // We are not sure if Collator::compare is thread-safe.
+ // Use an AutoLock just in case.
+ AutoLock auto_lock(lock_);
+
+ UErrorCode error_code = U_ZERO_ERROR;
+ UCollationResult result = collator_->compare(
+ static_cast<const UChar*>(a.c_str()),
+ static_cast<int>(a.length()),
+ static_cast<const UChar*>(b.c_str()),
+ static_cast<int>(b.length()),
+ error_code);
+ DCHECK(U_SUCCESS(error_code));
+ return result;
+ }
+
+ private:
+ scoped_ptr<Collator> collator_;
+ Lock lock_;
+ friend struct DefaultSingletonTraits<LocaleAwareComparator>;
+
+ DISALLOW_COPY_AND_ASSIGN(LocaleAwareComparator);
+};
+
+int CompareFiles(const FTSENT** a, const FTSENT** b) {
+ // Order lexicographically with directories before other files.
+ const bool a_is_dir = IsDirectory(*a);
+ const bool b_is_dir = IsDirectory(*b);
+ if (a_is_dir != b_is_dir)
+ return a_is_dir ? -1 : 1;
+
+ // On linux, the file system encoding is not defined. We assume
+ // SysNativeMBToWide takes care of it.
+ //
+ // ICU's collator can take strings in OS native encoding. But we convert the
+ // strings to UTF-16 ourselves to ensure conversion consistency.
+ // TODO(yuzo): Perhaps we should define SysNativeMBToUTF16?
+ return Singleton<LocaleAwareComparator>()->Compare(
+ WideToUTF16(base::SysNativeMBToWide((*a)->fts_name)),
+ WideToUTF16(base::SysNativeMBToWide((*b)->fts_name)));
+}
+
} // namespace
namespace file_util {
@@ -576,18 +643,6 @@ void FileEnumerator::GetFindInfo(FindInfo* info) {
info->filename.assign(fts_ent_->fts_name);
}
-int CompareFiles(const FTSENT** a, const FTSENT** b) {
- // Order lexicographically with directories before other files.
- const bool a_is_dir = IsDirectory(*a);
- const bool b_is_dir = IsDirectory(*b);
- if (a_is_dir != b_is_dir)
- return a_is_dir ? -1 : 1;
-
- // TODO(yuzo): make this internationalized when encoding detection function
- // becomes available.
- return base::strcasecmp((*a)->fts_name, (*b)->fts_name);
-}
-
// As it stands, this method calls itself recursively when the next item of
// the fts enumeration doesn't match (type, pattern, etc.). In the case of
// large directories with many files this can be quite deep.