diff options
author | estade@chromium.org <estade@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-07-27 21:24:39 +0000 |
---|---|---|
committer | estade@chromium.org <estade@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2010-07-27 21:24:39 +0000 |
commit | 3432291b5b66e9bea09537c3bfbc4d80f27442a4 (patch) | |
tree | 26b6a4c09bcebc35931e1f92abbfbacabea2fa13 /base | |
parent | 87351794d9614198c266f7380c38a7327eb50056 (diff) | |
download | chromium_src-3432291b5b66e9bea09537c3bfbc4d80f27442a4.zip chromium_src-3432291b5b66e9bea09537c3bfbc4d80f27442a4.tar.gz chromium_src-3432291b5b66e9bea09537c3bfbc4d80f27442a4.tar.bz2 |
Treat multiple extensions like .tar.gz as a single extension.
The logic is taken from firefox.
BUG=48346
TEST=unit tests; downloading the same .tar.gz file multiple times (see bug)
Review URL: http://codereview.chromium.org/3018011
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@53844 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r-- | base/file_path.cc | 152 | ||||
-rw-r--r-- | base/file_path_unittest.cc | 47 |
2 files changed, 139 insertions, 60 deletions
diff --git a/base/file_path.cc b/base/file_path.cc index 1787a69..dd80eab 100644 --- a/base/file_path.cc +++ b/base/file_path.cc @@ -36,16 +36,18 @@ const FilePath::CharType FilePath::kParentDirectory[] = FILE_PATH_LITERAL(".."); const FilePath::CharType FilePath::kExtensionSeparator = FILE_PATH_LITERAL('.'); +typedef FilePath::StringType StringType; namespace { +const char* kCommonDoubleExtensions[] = { "gz", "z", "bz2" }; + // If this FilePath contains a drive letter specification, returns the // position of the last character of the drive letter specification, // otherwise returns npos. This can only be true on Windows, when a pathname // begins with a letter followed by a colon. On other platforms, this always // returns npos. -FilePath::StringType::size_type FindDriveLetter( - const FilePath::StringType& path) { +StringType::size_type FindDriveLetter(const StringType& path) { #if defined(FILE_PATH_USES_DRIVE_LETTERS) // This is dependent on an ASCII-based character set, but that's a // reasonable assumption. iswalpha can be too inclusive here. @@ -55,35 +57,33 @@ FilePath::StringType::size_type FindDriveLetter( return 1; } #endif // FILE_PATH_USES_DRIVE_LETTERS - return FilePath::StringType::npos; + return StringType::npos; } - #if defined(FILE_PATH_USES_DRIVE_LETTERS) -bool EqualDriveLetterCaseInsensitive(const FilePath::StringType a, - const FilePath::StringType b) { +bool EqualDriveLetterCaseInsensitive(const StringType a, + const StringType b) { size_t a_letter_pos = FindDriveLetter(a); size_t b_letter_pos = FindDriveLetter(b); - if ((a_letter_pos == FilePath::StringType::npos) || - (b_letter_pos == FilePath::StringType::npos)) + if (a_letter_pos == StringType::npos || b_letter_pos == StringType::npos) return a == b; - FilePath::StringType a_letter(a.substr(0, a_letter_pos + 1)); - FilePath::StringType b_letter(b.substr(0, b_letter_pos + 1)); + StringType a_letter(a.substr(0, a_letter_pos + 1)); + StringType b_letter(b.substr(0, b_letter_pos + 1)); if (!StartsWith(a_letter, b_letter, false)) return false; - FilePath::StringType a_rest(a.substr(a_letter_pos + 1)); - FilePath::StringType b_rest(b.substr(b_letter_pos + 1)); + StringType a_rest(a.substr(a_letter_pos + 1)); + StringType b_rest(b.substr(b_letter_pos + 1)); return a_rest == b_rest; } #endif // defined(FILE_PATH_USES_DRIVE_LETTERS) -bool IsPathAbsolute(const FilePath::StringType& path) { +bool IsPathAbsolute(const StringType& path) { #if defined(FILE_PATH_USES_DRIVE_LETTERS) - FilePath::StringType::size_type letter = FindDriveLetter(path); - if (letter != FilePath::StringType::npos) { + StringType::size_type letter = FindDriveLetter(path); + if (letter != StringType::npos) { // Look for a separator right after the drive specification. return path.length() > letter + 1 && FilePath::IsSeparator(path[letter + 1]); @@ -97,8 +97,8 @@ bool IsPathAbsolute(const FilePath::StringType& path) { #endif // FILE_PATH_USES_DRIVE_LETTERS } -bool AreAllSeparators(const FilePath::StringType& input) { - for (FilePath::StringType::const_iterator it = input.begin(); +bool AreAllSeparators(const StringType& input) { + for (StringType::const_iterator it = input.begin(); it != input.end(); ++it) { if (!FilePath::IsSeparator(*it)) return false; @@ -107,6 +107,54 @@ bool AreAllSeparators(const FilePath::StringType& input) { return true; } +// Find the position of the '.' that separates the extension from the rest +// of the file name. The position is relative to BaseName(), not value(). +// This allows a second extension component of up to 4 characters when the +// rightmost extension component is a common double extension (gz, bz2, Z). +// For example, foo.tar.gz or foo.tar.Z would have extension components of +// '.tar.gz' and '.tar.Z' respectively. Returns npos if it can't find an +// extension. +StringType::size_type ExtensionSeparatorPosition(const StringType& path) { + // Special case "." and ".." + if (path == FilePath::kCurrentDirectory || path == FilePath::kParentDirectory) + return StringType::npos; + + const StringType::size_type last_dot = + path.rfind(FilePath::kExtensionSeparator); + + // No extension, or the extension is the whole filename. + if (last_dot == StringType::npos || last_dot == 0U) + return last_dot; + + // Special case .<extension1>.<extension2>, but only if the final extension + // is one of a few common double extensions. + StringType extension(path, last_dot + 1); + bool is_common_double_extension = false; + for (size_t i = 0; i < arraysize(kCommonDoubleExtensions); ++i) { + if (LowerCaseEqualsASCII(extension, kCommonDoubleExtensions[i])) + is_common_double_extension = true; + } + if (!is_common_double_extension) + return last_dot; + + // Check that <extension1> is 1-4 characters, otherwise fall back to + // <extension2>. + const StringType::size_type penultimate_dot = + path.rfind(FilePath::kExtensionSeparator, last_dot - 1); + const StringType::size_type last_separator = + path.find_last_of(FilePath::kSeparators, last_dot - 1, + arraysize(FilePath::kSeparators) - 1); + if (penultimate_dot != StringType::npos && + (last_separator == StringType::npos || + penultimate_dot > last_separator) && + last_dot - penultimate_dot <= 5U && + last_dot - penultimate_dot > 1U) { + return penultimate_dot; + } + + return last_dot; +} + } // namespace FilePath::FilePath() { @@ -136,8 +184,7 @@ bool FilePath::IsSeparator(CharType character) { return false; } -void FilePath::GetComponents(std::vector<FilePath::StringType>* components) - const { +void FilePath::GetComponents(std::vector<StringType>* components) const { DCHECK(components); if (!components) return; @@ -145,7 +192,7 @@ void FilePath::GetComponents(std::vector<FilePath::StringType>* components) if (value().empty()) return; - std::vector<FilePath::StringType> ret_val; + std::vector<StringType> ret_val; FilePath current = *this; FilePath base; @@ -165,12 +212,11 @@ void FilePath::GetComponents(std::vector<FilePath::StringType>* components) // Capture drive letter, if any. FilePath dir = current.DirName(); StringType::size_type letter = FindDriveLetter(dir.value()); - if (letter != FilePath::StringType::npos) { - ret_val.push_back(FilePath::StringType(dir.value(), 0, letter + 1)); + if (letter != StringType::npos) { + ret_val.push_back(StringType(dir.value(), 0, letter + 1)); } - *components = std::vector<FilePath::StringType>(ret_val.rbegin(), - ret_val.rend()); + *components = std::vector<StringType>(ret_val.rbegin(), ret_val.rend()); } bool FilePath::operator==(const FilePath& that) const { @@ -195,8 +241,8 @@ bool FilePath::IsParent(const FilePath& child) const { bool FilePath::AppendRelativePath(const FilePath& child, FilePath* path) const { - std::vector<FilePath::StringType> parent_components; - std::vector<FilePath::StringType> child_components; + std::vector<StringType> parent_components; + std::vector<StringType> child_components; GetComponents(&parent_components); child.GetComponents(&child_components); @@ -205,17 +251,17 @@ bool FilePath::AppendRelativePath(const FilePath& child, if (parent_components.size() == 0) return false; - std::vector<FilePath::StringType>::const_iterator parent_comp = + std::vector<StringType>::const_iterator parent_comp = parent_components.begin(); - std::vector<FilePath::StringType>::const_iterator child_comp = + std::vector<StringType>::const_iterator child_comp = child_components.begin(); #if defined(FILE_PATH_USES_DRIVE_LETTERS) // Windows can access case sensitive filesystems, so component // comparisions must be case sensitive, but drive letters are // never case sensitive. - if ((FindDriveLetter(*parent_comp) != FilePath::StringType::npos) && - (FindDriveLetter(*child_comp) != FilePath::StringType::npos)) { + if ((FindDriveLetter(*parent_comp) != StringType::npos) && + (FindDriveLetter(*child_comp) != StringType::npos)) { if (!StartsWith(*parent_comp, *child_comp, false)) return false; ++parent_comp; @@ -301,30 +347,24 @@ FilePath FilePath::BaseName() const { return new_path; } -FilePath::StringType FilePath::Extension() const { - // BaseName() calls StripTrailingSeparators, so cases like /foo.baz/// work. - StringType base = BaseName().value(); - - // Special case "." and ".." - if (base == kCurrentDirectory || base == kParentDirectory) +StringType FilePath::Extension() const { + FilePath base(BaseName()); + const StringType::size_type dot = ExtensionSeparatorPosition(base.path_); + if (dot == StringType::npos) return StringType(); - const StringType::size_type last_dot = base.rfind(kExtensionSeparator); - if (last_dot == StringType::npos) - return StringType(); - return StringType(base, last_dot); + return base.path_.substr(dot, StringType::npos); } FilePath FilePath::RemoveExtension() const { - StringType ext = Extension(); - // It's important to check Extension() since that verifies that the - // kExtensionSeparator actually appeared in the last path component. - if (ext.empty()) - return FilePath(path_); - // Since Extension() verified that the extension is in fact in the last path - // component, this substr will effectively strip trailing separators. - const StringType::size_type last_dot = path_.rfind(kExtensionSeparator); - return FilePath(path_.substr(0, last_dot)); + if (Extension().empty()) + return *this; + + const StringType::size_type dot = ExtensionSeparatorPosition(path_); + if (dot == StringType::npos) + return *this; + + return FilePath(path_.substr(0, dot)); } FilePath FilePath::InsertBeforeExtension(const StringType& suffix) const { @@ -390,7 +430,7 @@ FilePath FilePath::ReplaceExtension(const StringType& extension) const { bool FilePath::MatchesExtension(const StringType& extension) const { DCHECK(extension.empty() || extension[0] == kExtensionSeparator); - FilePath::StringType current_extension = Extension(); + StringType current_extension = Extension(); if (current_extension.length() != extension.length()) return false; @@ -950,7 +990,7 @@ int FilePath::HFSFastUnicodeCompare(const StringType& string1, } } -FilePath::StringType FilePath::GetHFSDecomposedForm(const StringType& string) { +StringType FilePath::GetHFSDecomposedForm(const StringType& string) { scoped_cftyperef<CFStringRef> cfstring( CFStringCreateWithBytesNoCopy( NULL, @@ -1071,7 +1111,7 @@ FilePath FilePath::StripTrailingSeparators() const { // static. void FilePath::WriteStringTypeToPickle(Pickle* pickle, - const FilePath::StringType& path) { + const StringType& path) { #if defined(WCHAR_T_IS_UTF16) pickle->WriteWString(path); #elif defined(WCHAR_T_IS_UTF32) @@ -1083,7 +1123,7 @@ void FilePath::WriteStringTypeToPickle(Pickle* pickle, // static. bool FilePath::ReadStringTypeFromPickle(Pickle* pickle, void** iter, - FilePath::StringType* path) { + StringType* path) { #if defined(WCHAR_T_IS_UTF16) if (!pickle->ReadWString(iter, path)) return false; @@ -1129,12 +1169,12 @@ void FilePath::StripTrailingSeparatorsInternal() { } bool FilePath::ReferencesParent() const { - std::vector<FilePath::StringType> components; + std::vector<StringType> components; GetComponents(&components); - std::vector<FilePath::StringType>::const_iterator it = components.begin(); + std::vector<StringType>::const_iterator it = components.begin(); for (; it != components.end(); ++it) { - const FilePath::StringType& component = *it; + const StringType& component = *it; if (component == kParentDirectory) return true; } diff --git a/base/file_path_unittest.cc b/base/file_path_unittest.cc index 828a642..48bf23c 100644 --- a/base/file_path_unittest.cc +++ b/base/file_path_unittest.cc @@ -698,16 +698,16 @@ TEST_F(FilePathTest, Extension) { FilePath base_dir(FILE_PATH_LITERAL("base_dir")); FilePath jpg = base_dir.Append(FILE_PATH_LITERAL("foo.jpg")); - EXPECT_EQ(jpg.Extension(), FILE_PATH_LITERAL(".jpg")); + EXPECT_EQ(FILE_PATH_LITERAL(".jpg"), jpg.Extension()); FilePath base = jpg.BaseName().RemoveExtension(); - EXPECT_EQ(base.value(), FILE_PATH_LITERAL("foo")); + EXPECT_EQ(FILE_PATH_LITERAL("foo"), base.value()); FilePath path_no_ext = base_dir.Append(base); - EXPECT_EQ(jpg.RemoveExtension().value(), path_no_ext.value()); + EXPECT_EQ(path_no_ext.value(), jpg.RemoveExtension().value()); EXPECT_EQ(path_no_ext.value(), path_no_ext.RemoveExtension().value()); - EXPECT_EQ(path_no_ext.Extension(), FILE_PATH_LITERAL("")); + EXPECT_EQ(FILE_PATH_LITERAL(""), path_no_ext.Extension()); } TEST_F(FilePathTest, Extension2) { @@ -730,6 +730,16 @@ TEST_F(FilePathTest, Extension2) { { FPL("/foo/bar/"), FPL("") }, { FPL("/foo/bar./"), FPL(".") }, { FPL("/foo/bar/baz.ext1.ext2"), FPL(".ext2") }, + { FPL("/foo.tar.gz"), FPL(".tar.gz") }, + { FPL("/foo.tar.Z"), FPL(".tar.Z") }, + { FPL("/foo.tar.bz2"), FPL(".tar.bz2") }, + { FPL("/subversion-1.6.12.zip"), FPL(".zip") }, + { FPL("/foo.1234.gz"), FPL(".1234.gz") }, + { FPL("/foo.12345.gz"), FPL(".gz") }, + { FPL("/foo..gz"), FPL(".gz") }, + { FPL("/foo.1234.tar.gz"), FPL(".tar.gz") }, + { FPL("/foo.tar.tar.gz"), FPL(".tar.gz") }, + { FPL("/foo.tar.gz.gz"), FPL(".gz.gz") }, { FPL("."), FPL("") }, { FPL(".."), FPL("") }, { FPL("./foo"), FPL("") }, @@ -815,6 +825,34 @@ TEST_F(FilePathTest, InsertBeforeExtension) { } } +TEST_F(FilePathTest, RemoveExtension) { + const struct UnaryTestData cases[] = { + { FPL(""), FPL("") }, + { FPL("."), FPL(".") }, + { FPL(".."), FPL("..") }, + { FPL("foo.dll"), FPL("foo") }, + { FPL("./foo.dll"), FPL("./foo") }, + { FPL("foo..dll"), FPL("foo.") }, + { FPL("foo"), FPL("foo") }, + { FPL("foo."), FPL("foo") }, + { FPL("foo.."), FPL("foo.") }, + { FPL("foo.baz.dll"), FPL("foo.baz") }, + { FPL("foo.tar.gz"), FPL("foo") }, +#if defined(FILE_PATH_USES_WIN_SEPARATORS) + { FPL("C:\\foo.bar\\foo"), FPL("C:\\foo.bar\\foo") }, + { FPL("C:\\foo.bar\\..\\\\"), FPL("C:\\foo.bar\\..\\\\") }, +#endif + { FPL("/foo.bar/foo"), FPL("/foo.bar/foo") }, + { FPL("/foo.bar/..////"), FPL("/foo.bar/..////") }, + }; + for (unsigned int i = 0; i < arraysize(cases); ++i) { + FilePath path(cases[i].input); + FilePath removed = path.RemoveExtension(); + EXPECT_EQ(cases[i].expected, removed.value()) << "i: " << i << + ", path: " << path.value(); + } +} + TEST_F(FilePathTest, ReplaceExtension) { const struct BinaryTestData cases[] = { { { FPL(""), FPL("") }, FPL("") }, @@ -823,6 +861,7 @@ TEST_F(FilePathTest, ReplaceExtension) { { { FPL(".."), FPL("txt") }, FPL("") }, { { FPL("."), FPL("") }, FPL("") }, { { FPL("foo.dll"), FPL("txt") }, FPL("foo.txt") }, + { { FPL("./foo.dll"), FPL("txt") }, FPL("./foo.txt") }, { { FPL("foo..dll"), FPL("txt") }, FPL("foo..txt") }, { { FPL("foo.dll"), FPL(".txt") }, FPL("foo.txt") }, { { FPL("foo"), FPL("txt") }, FPL("foo.txt") }, |