summaryrefslogtreecommitdiffstats
path: root/base/i18n
diff options
context:
space:
mode:
Diffstat (limited to 'base/i18n')
-rw-r--r--base/i18n/file_util_icu.cc67
-rw-r--r--base/i18n/file_util_icu.h8
-rw-r--r--base/i18n/file_util_icu_unittest.cc46
3 files changed, 77 insertions, 44 deletions
diff --git a/base/i18n/file_util_icu.cc b/base/i18n/file_util_icu.cc
index 0bc9db6..4d33e3a 100644
--- a/base/i18n/file_util_icu.cc
+++ b/base/i18n/file_util_icu.cc
@@ -124,50 +124,47 @@ bool IsFilenameLegal(const string16& file_name) {
return Singleton<IllegalCharacters>()->containsNone(file_name);
}
-void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {
+void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,
+ char replace_char) {
DCHECK(file_name);
- DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)) &&
- replace_char < 0x10000);
+ DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)));
// Remove leading and trailing whitespace.
TrimWhitespace(*file_name, TRIM_ALL, file_name);
- if (IsFilenameLegal(WideToUTF16(*file_name)))
- return;
+ IllegalCharacters* illegal = Singleton<IllegalCharacters>::get();
+ int cursor = 0; // The ICU macros expect an int.
+ while (cursor < static_cast<int>(file_name->size())) {
+ int char_begin = cursor;
+ uint32 code_point;
+#if defined(OS_MACOSX)
+ // Mac uses UTF-8 encoding for filenames.
+ U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),
+ code_point);
+#elif defined(OS_WIN)
+ // Windows uses UTF-16 encoding for filenames.
+ U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()),
+ code_point);
+#elif defined(OS_LINUX)
+ // Linux doesn't actually define an encoding. It basically allows anything
+ // except for a few special ASCII characters.
+ unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]);
+ if (cur_char >= 0x80)
+ continue;
+ code_point = cur_char;
+#else
+ NOTREACHED();
+#endif
- std::wstring::size_type i = 0;
- std::wstring::size_type length = file_name->size();
- const wchar_t* wstr = file_name->data();
-#if defined(WCHAR_T_IS_UTF16)
- // Using |span| method of UnicodeSet might speed things up a bit, but
- // it's not likely to matter here.
- std::wstring temp;
- temp.reserve(length);
- while (i < length) {
- UChar32 ucs4;
- std::wstring::size_type prev = i;
- U16_NEXT(wstr, i, length, ucs4);
- if (Singleton<IllegalCharacters>()->contains(ucs4)) {
- temp.push_back(replace_char);
- } else if (ucs4 < 0x10000) {
- temp.push_back(ucs4);
- } else {
- temp.push_back(wstr[prev]);
- temp.push_back(wstr[prev + 1]);
+ if (illegal->contains(code_point)) {
+ file_name->replace(char_begin, cursor - char_begin, 1, replace_char);
+ // We just made the potentially multi-byte/word char into one that only
+ // takes one byte/word, so need to adjust the cursor to point to the next
+ // character again.
+ cursor = char_begin + 1;
}
}
- file_name->swap(temp);
-#elif defined(WCHAR_T_IS_UTF32)
- while (i < length) {
- if (Singleton<IllegalCharacters>()->contains(wstr[i])) {
- (*file_name)[i] = replace_char;
- }
- ++i;
- }
-#else
-#error wchar_t* should be either UTF-16 or UTF-32
-#endif
}
bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) {
diff --git a/base/i18n/file_util_icu.h b/base/i18n/file_util_icu.h
index c309a9e..54ddb08 100644
--- a/base/i18n/file_util_icu.h
+++ b/base/i18n/file_util_icu.h
@@ -6,6 +6,7 @@
#include <string>
+#include "base/file_path.h"
#include "base/string16.h"
class FilePath;
@@ -18,12 +19,13 @@ bool IsFilenameLegal(const string16& file_name);
// Replaces characters in 'file_name' that are illegal for file names with
// 'replace_char'. 'file_name' must not be a full or relative path, but just the
-// file name component. Any leading or trailing whitespace in 'file_name' is
-// removed.
+// file name component (since slashes are considered illegal). Any leading or
+// trailing whitespace in 'file_name' is removed.
// Example:
// file_name == "bad:file*name?.txt", changed to: "bad-file-name-.txt" when
// 'replace_char' is '-'.
-void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char);
+void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name,
+ char replace_char);
// Compares two filenames using the current locale information. This can be
// used to sort directory listings. It behaves like "operator<" for use in
diff --git a/base/i18n/file_util_icu_unittest.cc b/base/i18n/file_util_icu_unittest.cc
index aebcd0df..b46fe55 100644
--- a/base/i18n/file_util_icu_unittest.cc
+++ b/base/i18n/file_util_icu_unittest.cc
@@ -6,6 +6,7 @@
#include "base/file_util.h"
#include "base/path_service.h"
+#include "base/utf_string_conversions.h"
#include "testing/gtest/include/gtest/gtest.h"
#include "testing/platform_test.h"
@@ -34,9 +35,34 @@ class FileUtilICUTest : public PlatformTest {
FilePath test_dir_;
};
+#if defined(OS_LINUX)
+
+// Linux disallows some evil ASCII characters, but passes all non-ASCII.
+static const struct goodbad_pair {
+ const char* bad_name;
+ const char* good_name;
+} kIllegalCharacterCases[] = {
+ {"bad*file:name?.jpg", "bad-file-name-.jpg"},
+ {"**********::::.txt", "--------------.txt"},
+ {"\xe9\xf0zzzz.\xff", "\xe9\xf0zzzz.\xff"},
+};
+
+TEST_F(FileUtilICUTest, ReplaceIllegalCharacersInPathLinuxTest) {
+ for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
+ std::string bad_name(kIllegalCharacterCases[i].bad_name);
+ file_util::ReplaceIllegalCharactersInPath(&bad_name, '-');
+ EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name);
+ }
+}
+
+#else
+
+// For Mac & Windows, which both do Unicode validation on filenames. These
+// characters are given as wide strings since its more convenient to specify
+// unicode characters. For Mac they should be converted to UTF-8.
static const struct goodbad_pair {
- std::wstring bad_name;
- std::wstring good_name;
+ const wchar_t* bad_name;
+ const wchar_t* good_name;
} kIllegalCharacterCases[] = {
{L"bad*file:name?.jpg", L"bad-file-name-.jpg"},
{L"**********::::.txt", L"--------------.txt"},
@@ -46,7 +72,7 @@ static const struct goodbad_pair {
#if defined(OS_WIN)
{L"bad*file\\name.jpg", L"bad-file-name.jpg"},
{L"\t bad*file\\name/.jpg ", L"bad-file-name-.jpg"},
-#elif defined(OS_POSIX)
+#elif defined(OS_MACOSX)
{L"bad*file?name.jpg", L"bad-file-name.jpg"},
{L"\t bad*file?name/.jpg ", L"bad-file-name-.jpg"},
#endif
@@ -61,11 +87,19 @@ static const struct goodbad_pair {
{L"bad\uFDD0file\uFDEFname.jpg ", L"bad-file-name.jpg"},
};
-TEST_F(FileUtilICUTest, ReplaceIllegalCharactersTest) {
- for (unsigned int i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
+TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathTest) {
+ for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) {
+#if defined(OS_WIN)
std::wstring bad_name(kIllegalCharacterCases[i].bad_name);
- file_util::ReplaceIllegalCharacters(&bad_name, L'-');
+ file_util::ReplaceIllegalCharactersInPath(&bad_name, '-');
EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name);
+#elif defined(OS_MACOSX)
+ std::string bad_name(WideToUTF8(kIllegalCharacterCases[i].bad_name));
+ file_util::ReplaceIllegalCharactersInPath(&bad_name, '-');
+ EXPECT_EQ(WideToUTF8(kIllegalCharacterCases[i].good_name), bad_name);
+#endif
}
}
+#endif
+