3 files changed, 77 insertions, 44 deletions
diff --git a/base/i18n/file_util_icu.cc b/base/i18n/file_util_icu.cc
index 0bc9db6..4d33e3a 100644
--- a/base/i18n/file_util_icu.cc
+++ b/base/i18n/file_util_icu.cc
@@ -124,50 +124,47 @@ bool IsFilenameLegal(const string16& file_name) {
   return Singleton<IllegalCharacters>()->containsNone(file_name);
 }
 
-void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {
+void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,
+                                    char replace_char) {
   DCHECK(file_name);
 
-  DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)) &&
-         replace_char < 0x10000);
+  DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)));
 
   // Remove leading and trailing whitespace.
   TrimWhitespace(*file_name, TRIM_ALL, file_name);
 
-  if (IsFilenameLegal(WideToUTF16(*file_name)))
-    return;
+  IllegalCharacters* illegal = Singleton<IllegalCharacters>::get();
+  int cursor = 0;  // The ICU macros expect an int.
+  while (cursor < static_cast<int>(file_name->size())) {
+    int char_begin = cursor;
+    uint32 code_point;
+#if defined(OS_MACOSX)
+    // Mac uses UTF-8 encoding for filenames.
+    U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),
+            code_point);
+#elif defined(OS_WIN)
+    // Windows uses UTF-16 encoding for filenames.
+    U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),
+             code_point);
+#elif defined(OS_LINUX)
+    // Linux doesn't actually define an encoding. It basically allows anything
+    // except for a few special ASCII characters.
+    unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]);
+    if (cur_char >= 0x80)
+      continue;
+    code_point = cur_char;
+#else
+    NOTREACHED();
+#endif
 
-  std::wstring::size_type i = 0;
-  std::wstring::size_type length = file_name->size();
-  const wchar_t* wstr = file_name->data();
-#if defined(WCHAR_T_IS_UTF16)
-  // Using |span| method of UnicodeSet might speed things up a bit, but
-  // it's not likely to matter here.
-  std::wstring temp;
-  temp.reserve(length);
-  while (i < length) {
-    UChar32 ucs4;
-    std::wstring::size_type prev = i;
-    U16_NEXT(wstr, i, length, ucs4);
-    if (Singleton<IllegalCharacters>()->contains(ucs4)) {
-      temp.push_back(replace_char);
-    } else if (ucs4 < 0x10000) {
-      temp.push_back(ucs4);
-    } else {
-      temp.push_back(wstr[prev]);
-      temp.push_back(wstr[prev + 1]);
+    if (illegal->contains(code_point)) {
+      file_name->replace(char_begin, cursor - char_begin, 1, replace_char);
+      // We just made the potentially multi-byte/word char into one that only
+      // takes one byte/word, so need to adjust the cursor to point to the next
+      // character again.
+      cursor = char_begin + 1;
     }
   }
-  file_name->swap(temp);
-#elif defined(WCHAR_T_IS_UTF32)
-  while (i < length) {
-    if (Singleton<IllegalCharacters>()->contains(wstr[i])) {
-      (*file_name)[i] = replace_char;
-    }
-    ++i;
-  }
-#else
-#error wchar_t* should be either UTF-16 or UTF-32
-#endif
 }
 
 bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) {
diff --git a/base/i18n/file_util_icu.h b/base/i18n/file_util_icu.h
index c309a9e..54ddb08 100644
--- a/base/i18n/file_util_icu.h
+++ b/base/i18n/file_util_icu.h
@@ -6,6 +6,7 @@
 
 #include <string>
 
+#include "base/file_path.h"
 #include "base/string16.h"
 
 class FilePath;
@@ -18,12 +19,13 @@ bool IsFilenameLegal(const string16& file_name);
 
 // Replaces characters in 'file_name' that are illegal for file names with
 // 'replace_char'. 'file_name' must not be a full or relative path, but just the
-// file name component. Any leading or trailing whitespace in 'file_name' is
-// removed.
+// file name component (since slashes are considered illegal). Any leading or
+// trailing whitespace in 'file_name' is removed.
 // Example:
 //   file_name == "bad:file*name?.txt", changed to: "bad-file-name-.txt" when
 //   'replace_char' is '-'.
-void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char);
+void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,
+                                    char replace_char);
 
 // Compares two filenames using the current locale information. This can be
 // used to sort directory listings. It behaves like "operator<" for use in
diff --git a/base/i18n/file_util_icu_unittest.cc b/base/i18n/file_util_icu_unittest.cc
index aebcd0df..b46fe55 100644
--- a/base/i18n/file_util_icu_unittest.cc
+++ b/base/i18n/file_util_icu_unittest.cc
@@ -6,6 +6,7 @@
 
 #include "base/file_util.h"
 #include "base/path_service.h"
+#include "base/utf_string_conversions.h"
 #include "testing/gtest/include/gtest/gtest.h"
 #include "testing/platform_test.h"
 
@@ -34,9 +35,34 @@ class FileUtilICUTest : public PlatformTest {
   FilePath test_dir_;
 };
 
+#if defined(OS_LINUX)
+
+// Linux disallows some evil ASCII characters, but passes all non-ASCII.
+static const struct goodbad_pair {
+  const char* bad_name;
+  const char* good_name;
+} kIllegalCharacterCases[] = {
+  {"bad*file:name?.jpg", "bad-file-name-.jpg"},
+  {"**********::::.txt", "--------------.txt"},
+  {"\xe9\xf0zzzz.\xff", "\xe9\xf0zzzz.\xff"},
+};
+
+TEST_F(FileUtilICUTest, ReplaceIllegalCharacersInPathLinuxTest) {
+  for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
+    std::string bad_name(kIllegalCharacterCases[i].bad_name);
+    file_util::ReplaceIllegalCharactersInPath(&bad_name, '-');
+    EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name);
+  }
+}
+
+#else
+
+// For Mac & Windows, which both do Unicode validation on filenames. These
+// characters are given as wide strings since its more convenient to specify
+// unicode characters. For Mac they should be converted to UTF-8.
 static const struct goodbad_pair {
-  std::wstring bad_name;
-  std::wstring good_name;
+  const wchar_t* bad_name;
+  const wchar_t* good_name;
 } kIllegalCharacterCases[] = {
   {L"bad*file:name?.jpg", L"bad-file-name-.jpg"},
   {L"**********::::.txt", L"--------------.txt"},
@@ -46,7 +72,7 @@ static const struct goodbad_pair {
 #if defined(OS_WIN)
   {L"bad*file\\name.jpg", L"bad-file-name.jpg"},
   {L"\t  bad*file\\name/.jpg ", L"bad-file-name-.jpg"},
-#elif defined(OS_POSIX)
+#elif defined(OS_MACOSX)
   {L"bad*file?name.jpg", L"bad-file-name.jpg"},
   {L"\t  bad*file?name/.jpg ", L"bad-file-name-.jpg"},
 #endif
@@ -61,11 +87,19 @@ static const struct goodbad_pair {
   {L"bad\uFDD0file\uFDEFname.jpg ", L"bad-file-name.jpg"},
 };
 
-TEST_F(FileUtilICUTest, ReplaceIllegalCharactersTest) {
-  for (unsigned int i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
+TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathTest) {
+  for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
+#if defined(OS_WIN)
     std::wstring bad_name(kIllegalCharacterCases[i].bad_name);
-    file_util::ReplaceIllegalCharacters(&bad_name, L'-');
+    file_util::ReplaceIllegalCharactersInPath(&bad_name, '-');
     EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name);
+#elif defined(OS_MACOSX)
+    std::string bad_name(WideToUTF8(kIllegalCharacterCases[i].bad_name));
+    file_util::ReplaceIllegalCharactersInPath(&bad_name, '-');
+    EXPECT_EQ(WideToUTF8(kIllegalCharacterCases[i].good_name), bad_name);
+#endif
   }
 }
 
+#endif
+