diff options
author | brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-22 23:06:12 +0000 |
---|---|---|
committer | brettw@chromium.org <brettw@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-10-22 23:06:12 +0000 |
commit | de29433580baeecc204edc9ca0c2aa47c51aa93a (patch) | |
tree | ec55f655f63f7e09fb373c1e7d784c2e721c6119 /base | |
parent | 2c1639589b5932b565c9d420cb79b56a4212a706 (diff) | |
download | chromium_src-de29433580baeecc204edc9ca0c2aa47c51aa93a.zip chromium_src-de29433580baeecc204edc9ca0c2aa47c51aa93a.tar.gz chromium_src-de29433580baeecc204edc9ca0c2aa47c51aa93a.tar.bz2 |
Do some cleanup of file path name handling.
This started trying to cleanup DownloadManager::GenerateFilename which asserts
if your system locale isn't UTF-8 (I ran into this when mine got messed up).
The solution is to have GetSuggestedFilename return a FilePath rather than
calling FromWStringHack.
The rest of the patch is a result of trying to write GetSuggestedFilename in a
reasonable way. I changed ReplaceIllegalCharacters to work on a
FilePath::StringType.
Some places in the code calling these functions got cleaner, some got messier.
I think overall the ones that got messier are the ones doing sketchy things
with paths and the ones that got cleaner are the ones doing things more
properly.
The only code here that gets called a nontrivial number of times is the
weburlloader, and I think the new code does about the same number of string
conversions overall (though on certain platforms the number will be higher or
lower).
BUG=none
TEST=none
Review URL: http://codereview.chromium.org/271056
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@29832 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r-- | base/i18n/file_util_icu.cc | 67 | ||||
-rw-r--r-- | base/i18n/file_util_icu.h | 8 | ||||
-rw-r--r-- | base/i18n/file_util_icu_unittest.cc | 46 |
3 files changed, 77 insertions, 44 deletions
diff --git a/base/i18n/file_util_icu.cc b/base/i18n/file_util_icu.cc index 0bc9db6..4d33e3a 100644 --- a/base/i18n/file_util_icu.cc +++ b/base/i18n/file_util_icu.cc @@ -124,50 +124,47 @@ bool IsFilenameLegal(const string16& file_name) { return Singleton<IllegalCharacters>()->containsNone(file_name); } -void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) { +void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, + char replace_char) { DCHECK(file_name); - DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char)) && - replace_char < 0x10000); + DCHECK(!(Singleton<IllegalCharacters>()->contains(replace_char))); // Remove leading and trailing whitespace. TrimWhitespace(*file_name, TRIM_ALL, file_name); - if (IsFilenameLegal(WideToUTF16(*file_name))) - return; + IllegalCharacters* illegal = Singleton<IllegalCharacters>::get(); + int cursor = 0; // The ICU macros expect an int. + while (cursor < static_cast<int>(file_name->size())) { + int char_begin = cursor; + uint32 code_point; +#if defined(OS_MACOSX) + // Mac uses UTF-8 encoding for filenames. + U8_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), + code_point); +#elif defined(OS_WIN) + // Windows uses UTF-16 encoding for filenames. + U16_NEXT(file_name->data(), cursor, static_cast<int>(file_name->length()), + code_point); +#elif defined(OS_LINUX) + // Linux doesn't actually define an encoding. It basically allows anything + // except for a few special ASCII characters. + unsigned char cur_char = static_cast<unsigned char>((*file_name)[cursor++]); + if (cur_char >= 0x80) + continue; + code_point = cur_char; +#else + NOTREACHED(); +#endif - std::wstring::size_type i = 0; - std::wstring::size_type length = file_name->size(); - const wchar_t* wstr = file_name->data(); -#if defined(WCHAR_T_IS_UTF16) - // Using |span| method of UnicodeSet might speed things up a bit, but - // it's not likely to matter here. - std::wstring temp; - temp.reserve(length); - while (i < length) { - UChar32 ucs4; - std::wstring::size_type prev = i; - U16_NEXT(wstr, i, length, ucs4); - if (Singleton<IllegalCharacters>()->contains(ucs4)) { - temp.push_back(replace_char); - } else if (ucs4 < 0x10000) { - temp.push_back(ucs4); - } else { - temp.push_back(wstr[prev]); - temp.push_back(wstr[prev + 1]); + if (illegal->contains(code_point)) { + file_name->replace(char_begin, cursor - char_begin, 1, replace_char); + // We just made the potentially multi-byte/word char into one that only + // takes one byte/word, so need to adjust the cursor to point to the next + // character again. + cursor = char_begin + 1; } } - file_name->swap(temp); -#elif defined(WCHAR_T_IS_UTF32) - while (i < length) { - if (Singleton<IllegalCharacters>()->contains(wstr[i])) { - (*file_name)[i] = replace_char; - } - ++i; - } -#else -#error wchar_t* should be either UTF-16 or UTF-32 -#endif } bool LocaleAwareCompareFilenames(const FilePath& a, const FilePath& b) { diff --git a/base/i18n/file_util_icu.h b/base/i18n/file_util_icu.h index c309a9e..54ddb08 100644 --- a/base/i18n/file_util_icu.h +++ b/base/i18n/file_util_icu.h @@ -6,6 +6,7 @@ #include <string> +#include "base/file_path.h" #include "base/string16.h" class FilePath; @@ -18,12 +19,13 @@ bool IsFilenameLegal(const string16& file_name); // Replaces characters in 'file_name' that are illegal for file names with // 'replace_char'. 'file_name' must not be a full or relative path, but just the -// file name component. Any leading or trailing whitespace in 'file_name' is -// removed. +// file name component (since slashes are considered illegal). Any leading or +// trailing whitespace in 'file_name' is removed. // Example: // file_name == "bad:file*name?.txt", changed to: "bad-file-name-.txt" when // 'replace_char' is '-'. -void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char); +void ReplaceIllegalCharactersInPath(FilePath::StringType* file_name, + char replace_char); // Compares two filenames using the current locale information. This can be // used to sort directory listings. It behaves like "operator<" for use in diff --git a/base/i18n/file_util_icu_unittest.cc b/base/i18n/file_util_icu_unittest.cc index aebcd0df..b46fe55 100644 --- a/base/i18n/file_util_icu_unittest.cc +++ b/base/i18n/file_util_icu_unittest.cc @@ -6,6 +6,7 @@ #include "base/file_util.h" #include "base/path_service.h" +#include "base/utf_string_conversions.h" #include "testing/gtest/include/gtest/gtest.h" #include "testing/platform_test.h" @@ -34,9 +35,34 @@ class FileUtilICUTest : public PlatformTest { FilePath test_dir_; }; +#if defined(OS_LINUX) + +// Linux disallows some evil ASCII characters, but passes all non-ASCII. +static const struct goodbad_pair { + const char* bad_name; + const char* good_name; +} kIllegalCharacterCases[] = { + {"bad*file:name?.jpg", "bad-file-name-.jpg"}, + {"**********::::.txt", "--------------.txt"}, + {"\xe9\xf0zzzz.\xff", "\xe9\xf0zzzz.\xff"}, +}; + +TEST_F(FileUtilICUTest, ReplaceIllegalCharacersInPathLinuxTest) { + for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) { + std::string bad_name(kIllegalCharacterCases[i].bad_name); + file_util::ReplaceIllegalCharactersInPath(&bad_name, '-'); + EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name); + } +} + +#else + +// For Mac & Windows, which both do Unicode validation on filenames. These +// characters are given as wide strings since its more convenient to specify +// unicode characters. For Mac they should be converted to UTF-8. static const struct goodbad_pair { - std::wstring bad_name; - std::wstring good_name; + const wchar_t* bad_name; + const wchar_t* good_name; } kIllegalCharacterCases[] = { {L"bad*file:name?.jpg", L"bad-file-name-.jpg"}, {L"**********::::.txt", L"--------------.txt"}, @@ -46,7 +72,7 @@ static const struct goodbad_pair { #if defined(OS_WIN) {L"bad*file\\name.jpg", L"bad-file-name.jpg"}, {L"\t bad*file\\name/.jpg ", L"bad-file-name-.jpg"}, -#elif defined(OS_POSIX) +#elif defined(OS_MACOSX) {L"bad*file?name.jpg", L"bad-file-name.jpg"}, {L"\t bad*file?name/.jpg ", L"bad-file-name-.jpg"}, #endif @@ -61,11 +87,19 @@ static const struct goodbad_pair { {L"bad\uFDD0file\uFDEFname.jpg ", L"bad-file-name.jpg"}, }; -TEST_F(FileUtilICUTest, ReplaceIllegalCharactersTest) { - for (unsigned int i = 0; i < arraysize(kIllegalCharacterCases); ++i) { +TEST_F(FileUtilICUTest, ReplaceIllegalCharactersInPathTest) { + for (size_t i = 0; i < arraysize(kIllegalCharacterCases); ++i) { +#if defined(OS_WIN) std::wstring bad_name(kIllegalCharacterCases[i].bad_name); - file_util::ReplaceIllegalCharacters(&bad_name, L'-'); + file_util::ReplaceIllegalCharactersInPath(&bad_name, '-'); EXPECT_EQ(kIllegalCharacterCases[i].good_name, bad_name); +#elif defined(OS_MACOSX) + std::string bad_name(WideToUTF8(kIllegalCharacterCases[i].bad_name)); + file_util::ReplaceIllegalCharactersInPath(&bad_name, '-'); + EXPECT_EQ(WideToUTF8(kIllegalCharacterCases[i].good_name), bad_name); +#endif } } +#endif + |