diff options
Diffstat (limited to 'base/i18n')
-rw-r--r-- | base/i18n/file_util_icu.cc | 67 | ||||
-rw-r--r-- | base/i18n/file_util_icu.h | 8 | ||||
-rw-r--r-- | base/i18n/file_util_icu_unittest.cc | 46 |
3 files changed, 77 insertions, 44 deletions
diff --git a/base/i18n/file_util_icu.cc b/base/i18n/file_util_icu.cc index 0bc9db6..4d33e3a 100644 --- a/base/i18n/file_util_icu.cc +++ b/base/i18n/file_util_icu.cc @@ -124,50 +124,47 @@ bool IsFilenameLegal(const string16& file_name) { return Singleton<IllegalCharacters>()->containsNone(file_name); } -void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) { +void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, + char replace_char) { DCHECK(file_name); - DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)) && - replace_char < 0x10000); + DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char))); // Remove leading and trailing whitespace. TrimWhitespace(*file_name, TRIM_ALL, file_name); - if (IsFilenameLegal(WideToUTF16(*file_name))) - return; + IllegalCharacters* illegal = Singleton<IllegalCharacters>::get(); + int cursor = 0; // The ICU macros expect an int. + while (cursor < static_cast<int>(file_name->size())) { + int char_begin = cursor; + uint32 code_point; +#if defined(OS_MACOSX) + // Mac uses UTF-8 encoding for filenames. + U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), + code_point); +#elif defined(OS_WIN) + // Windows uses UTF-16 encoding for filenames. + U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), + code_point); +#elif defined(OS_LINUX) + // Linux doesn't actually define an encoding. It basically allows anything + // except for a few special ASCII characters. + unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); + if (cur_char >= 0x80) + continue; + code_point = cur_char; +#else + NOTREACHED(); +#endif - std::wstring::size_type i = 0; - std::wstring::size_type length = file_name->size(); - const wchar_t* wstr = file_name->data(); -#if defined(WCHAR_T_IS_UTF16) - // Using |span| method of UnicodeSet might speed things up a bit, but - // it's not likely to matter here. - std::wstring temp; - temp.reserve(length); - while (i < length) { - UChar32 ucs4; - std::wstring::size_type prev = i; - U16_NEXT(wstr, i, length, ucs4); - if (Singleton<IllegalCharacters>()->contains(ucs4)) { - temp.push_back(replace_char); - } else if (ucs4 < 0x10000) { - temp.push_back(ucs4); - } else { - temp.push_back(wstr[prev]); - temp.push_back(wstr[prev + 1]); + if (illegal->contains(code_point)) { + file_name->replace(char_begin, cursor - char_begin, 1, replace_char); + // We just made the potentially multi-byte/word char into one that only + // takes one byte/word, so need to adjust the cursor to point to the next + // character again. + cursor = char_begin + 1; } } - file_name->swap(temp); -#elif defined(WCHAR_T_IS_UTF32) - while (i < length) { - if (Singleton<IllegalCharacters>()->contains(wstr[i])) { - (*file_name)[i] = replace_char; - } - ++i; - } -#else -#error wchar_t* should be either UTF-16 or UTF-32 -#endif } bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { diff --git a/base/i18n/file_util_icu.h b/base/i18n/file_util_icu.h index c309a9e..54ddb08 100644 --- a/base/i18n/file_util_icu.h +++ b/base/i18n/file_util_icu.h @@ -6,6 +6,7 @@ #include <string> +#include "base/file_path.h" #include "base/string16.h" class FilePath; @@ -18,12 +19,13 @@ bool IsFilenameLegal(const string16& file_name); // Replaces characters in 'file_name' that are illegal for file names with // 'replace_char'. 'file_name' must not be a full or relative path, but just the -// file name component. Any leading or trailing whitespace in 'file_name' is -// removed. +// file name component (since slashes are considered illegal). Any leading or +// trailing whitespace in 'file_name' is removed. // Example: // file_name == "bad:file*name?.txt", changed to: "bad-file-name-.txt" when // 'replace_char' is '-'. -void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char); +void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, + char replace_char); // Compares two filenames using the current locale information. This can be // used to sort directory listings. It behaves like "operator<" for use in diff --git a/base/i18n/file_util_icu_unittest.cc b/base/i18n/file_util_icu_unittest.cc index aebcd0df..b46fe55 100644 --- a/base/i18n/file_util_icu_unittest.cc +++ b/base/i18n/file_util_icu_unittest.cc @@ -6,6 +6,7 @@ #include "base/file_util.h" #include "base/path_service.h" +#include "base/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" #include "testing/platform_test.h" @@ -34,9 +35,34 @@ class FileUtilICUTest : public PlatformTest { FilePath test_dir_; }; +#if defined(OS_LINUX) + +// Linux disallows some evil ASCII characters, but passes all non-ASCII. +static const struct goodbad_pair { + const char* bad_name; + const char* good_name; +} kIllegalCharacterCases[] = { + {"bad*file:name?.jpg", "bad-file-name-.jpg"}, + {"**********::::.txt", "--------------.txt"}, + {"\xe9\xf0zzzz.\xff", "\xe9\xf0zzzz.\xff"}, +}; + +TEST_F(FileUtilICUTest, ReplaceIllegalCharacersInPathLinuxTest) { + for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) { + std::string bad_name(kIllegalCharacterCases[i].bad_name); + file_util::ReplaceIllegalCharactersInPath(&bad_name, '-'); + EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name); + } +} + +#else + +// For Mac & Windows, which both do Unicode validation on filenames. These +// characters are given as wide strings since its more convenient to specify +// unicode characters. For Mac they should be converted to UTF-8. static const struct goodbad_pair { - std::wstring bad_name; - std::wstring good_name; + const wchar_t* bad_name; + const wchar_t* good_name; } kIllegalCharacterCases[] = { {L"bad*file:name?.jpg", L"bad-file-name-.jpg"}, {L"**********::::.txt", L"--------------.txt"}, @@ -46,7 +72,7 @@ static const struct goodbad_pair { #if defined(OS_WIN) {L"bad*file\\name.jpg", L"bad-file-name.jpg"}, {L"\t bad*file\\name/.jpg ", L"bad-file-name-.jpg"}, -#elif defined(OS_POSIX) +#elif defined(OS_MACOSX) {L"bad*file?name.jpg", L"bad-file-name.jpg"}, {L"\t bad*file?name/.jpg ", L"bad-file-name-.jpg"}, #endif @@ -61,11 +87,19 @@ static const struct goodbad_pair { {L"bad\uFDD0file\uFDEFname.jpg ", L"bad-file-name.jpg"}, }; -TEST_F(FileUtilICUTest, ReplaceIllegalCharactersTest) { - for (unsigned int i = 0; i < arraysize(kIllegalCharacterCases); ++i) { +TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathTest) { + for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) { +#if defined(OS_WIN) std::wstring bad_name(kIllegalCharacterCases[i].bad_name); - file_util::ReplaceIllegalCharacters(&bad_name, L'-'); + file_util::ReplaceIllegalCharactersInPath(&bad_name, '-'); EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name); +#elif defined(OS_MACOSX) + std::string bad_name(WideToUTF8(kIllegalCharacterCases[i].bad_name)); + file_util::ReplaceIllegalCharactersInPath(&bad_name, '-'); + EXPECT_EQ(WideToUTF8(kIllegalCharacterCases[i].good_name), bad_name); +#endif } } +#endif + |