diff options
author | asanka@chromium.org <asanka@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-20 15:51:21 +0000 |
---|---|---|
committer | asanka@chromium.org <asanka@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2011-08-20 15:51:21 +0000 |
commit | 45f9adb0aebb394594a949e3312901219b62ac04 (patch) | |
tree | 44487c6bef136adcbdd50cef49bbfe498694a25f /net/base | |
parent | 8f7f1b4d437ca642b0eb09c8dd148d1fb5585eb1 (diff) | |
download | chromium_src-45f9adb0aebb394594a949e3312901219b62ac04.zip chromium_src-45f9adb0aebb394594a949e3312901219b62ac04.tar.gz chromium_src-45f9adb0aebb394594a949e3312901219b62ac04.tar.bz2 |
Call GenerateSafeFilename() from GetSuggestedFilename().
GetSuggestedFilename() would then become the only function that needs to be called to generate downloaded filenames.
BUG=78085
TEST=net_unittests --gtest_filter=*Generate*FileName
Review URL: http://codereview.chromium.org/7607013
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@97581 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/base')
-rw-r--r-- | net/base/net_util.cc | 172 | ||||
-rw-r--r-- | net/base/net_util.h | 45 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 7 |
3 files changed, 118 insertions, 106 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc index ef8eb53..0be8daa 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -922,6 +922,52 @@ char* do_strdup(const char* src) { #endif } +void SanitizeGeneratedFileName(std::string& filename) { + if (!filename.empty()) { + // Remove "." from the beginning and end of the file name to avoid tricks + // with hidden files, "..", and "." + TrimString(filename, ".", &filename); +#if defined(OS_WIN) + // Handle CreateFile() stripping trailing dots and spaces on filenames + // http://support.microsoft.com/kb/115827 + std::string::size_type pos = filename.find_last_not_of(" ."); + if (pos == std::string::npos) + filename.resize(0); + else + filename.resize(++pos); +#endif + // Replace any path information by changing path separators with + // underscores. + ReplaceSubstringsAfterOffset(&filename, 0, "/", "_"); + ReplaceSubstringsAfterOffset(&filename, 0, "\\", "_"); + } +} + +std::string GetFileNameFromURL(const GURL& url, + const std::string& referrer_charset) { + // about: and data: URLs don't have file names, but esp. data: URLs may + // contain parts that look like ones (i.e., contain a slash). Therefore we + // don't attempt to divine a file name out of them. + if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data")) + return std::string(); + + const std::string unescaped_url_filename = UnescapeURLComponent( + url.ExtractFileName(), + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); + + // The URL's path should be escaped UTF-8, but may not be. + std::string decoded_filename = unescaped_url_filename; + if (!IsStringASCII(decoded_filename)) { + bool ignore; + // TODO(jshin): this is probably not robust enough. To be sure, we need + // encoding detection. + DecodeWord(unescaped_url_filename, referrer_charset, &ignore, + &decoded_filename); + } + + return decoded_filename; +} + #if defined(OS_WIN) // Returns whether the specified extension is automatically integrated into the // windows shell. @@ -1379,99 +1425,57 @@ string16 GetSuggestedFilename(const GURL& url, const std::string& content_disposition, const std::string& referrer_charset, const std::string& suggested_name, + const std::string& mime_type, const string16& default_name) { // TODO: this function to be updated to match the httpbis recommendations. // Talk to abarth for the latest news. // We don't translate this fallback string, "download". If localization is - // needed, the caller should provide localized fallback default_name. + // needed, the caller should provide localized fallback in |default_name|. static const char* kFinalFallbackName = "download"; + std::string filename; // In UTF-8 - std::string filename; - - // Try to extract from content-disposition first. + // Try to extract a filename from content-disposition first. if (!content_disposition.empty()) filename = GetFileNameFromCD(content_disposition, referrer_charset); - // Then try to use suggested name. + // Then try to use the suggested name. if (filename.empty() && !suggested_name.empty()) filename = suggested_name; - if (!filename.empty()) { - // Replace any path information the server may have sent, by changing - // path separators with underscores. - ReplaceSubstringsAfterOffset(&filename, 0, "/", "_"); - ReplaceSubstringsAfterOffset(&filename, 0, "\\", "_"); - - // Next, remove "." from the beginning and end of the file name to avoid - // tricks with hidden files, "..", and "." - TrimString(filename, ".", &filename); - } - - if (filename.empty()) { - // about: and data: URLs don't have file names, but esp. data: URLs may - // contain parts that look like ones (i.e., contain a slash). - // Therefore we don't attempt to divine a file name out of them. - if (url.SchemeIs("about") || url.SchemeIs("data")) { - return default_name.empty() ? ASCIIToUTF16(kFinalFallbackName) - : default_name; - } - - if (url.is_valid()) { - const std::string unescaped_url_filename = UnescapeURLComponent( - url.ExtractFileName(), - UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); - - // The URL's path should be escaped UTF-8, but may not be. - std::string decoded_filename = unescaped_url_filename; - if (!IsStringASCII(decoded_filename)) { - bool ignore; - // TODO(jshin): this is probably not robust enough. To be sure, we - // need encoding detection. - DecodeWord(unescaped_url_filename, referrer_charset, &ignore, - &decoded_filename); - } + // Now try extracting the filename from the URL. GetFileNameFromURL() only + // looks at the last component of the URL and doesn't return the hostname as a + // failover. + if (filename.empty()) + filename = GetFileNameFromURL(url, referrer_charset); - filename = decoded_filename; - } + // Finally try the URL hostname, but only if there's no default specified in + // |default_name|. Some schemes (e.g.: file:, about:, data:) do not have a + // host name. + if (filename.empty() && default_name.empty() && + url.is_valid() && !url.host().empty()) { + // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451) + filename = url.host(); } -#if defined(OS_WIN) - { // Handle CreateFile() stripping trailing dots and spaces on filenames - // http://support.microsoft.com/kb/115827 - std::string::size_type pos = filename.find_last_not_of(" ."); - if (pos == std::string::npos) - filename.resize(0); - else - filename.resize(++pos); - } -#endif - // Trim '.' once more. - TrimString(filename, ".", &filename); - - // If there's no filename or it gets trimed to be empty, use - // the URL hostname or default_name - if (filename.empty()) { - if (!default_name.empty()) { - return default_name; - } else if (url.is_valid()) { - // Some schemes (e.g. file) do not have a hostname. Even though it's - // not likely to reach here, let's hardcode the last fallback name. - // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451) - filename = url.host().empty() ? kFinalFallbackName : url.host(); - } else { - NOTREACHED(); - } - } + SanitizeGeneratedFileName(filename); + // Sanitization can cause the filename to disappear (e.g.: if the filename + // consisted entirely of spaces and '.'s), in which case we use the default. + if (filename.empty() && default_name.empty()) + filename = kFinalFallbackName; #if defined(OS_WIN) - string16 path = UTF8ToUTF16(filename); + string16 path = (filename.empty())? default_name : UTF8ToUTF16(filename); file_util::ReplaceIllegalCharactersInPath(&path, '-'); - return path; + FilePath result(path); + GenerateSafeFileName(mime_type, &result); + return result.value(); #else - std::string path = filename; + std::string path = (filename.empty())? UTF16ToUTF8(default_name) : filename; file_util::ReplaceIllegalCharactersInPath(&path, '-'); - return UTF8ToUTF16(path); + FilePath result(path); + GenerateSafeFileName(mime_type, &result); + return UTF8ToUTF16(result.value()); #endif } @@ -1481,26 +1485,20 @@ FilePath GenerateFileName(const GURL& url, const std::string& suggested_name, const std::string& mime_type, const string16& default_file_name) { - string16 new_name = GetSuggestedFilename(GURL(url), - content_disposition, - referrer_charset, - suggested_name, - default_file_name); - - // TODO(evan): this code is totally wrong -- we should just generate - // Unicode filenames and do all this encoding switching at the end. - // However, I'm just shuffling wrong code around, at least not adding - // to it. + string16 file_name = GetSuggestedFilename(url, + content_disposition, + referrer_charset, + suggested_name, + mime_type, + default_file_name); + #if defined(OS_WIN) - FilePath generated_name = FilePath(new_name); + FilePath generated_name(file_name); #else - FilePath generated_name = FilePath( - base::SysWideToNativeMB(UTF16ToWide(new_name))); + FilePath generated_name(base::SysWideToNativeMB(UTF16ToWide(file_name))); #endif - DCHECK(!generated_name.empty()); - GenerateSafeFileName(mime_type, &generated_name); return generated_name; } diff --git a/net/base/net_util.h b/net/base/net_util.h index 76e49f6..8ccfeef 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -243,27 +243,42 @@ NET_EXPORT std::string GetDirectoryListingEntry(const string16& name, // unmodified. NET_EXPORT string16 StripWWW(const string16& text); -// Gets the filename in the following order: -// 1) the raw Content-Disposition header (as read from the network). -// |referrer_charset| is used as one of charsets to interpret a raw 8bit -// string in C-D header (after interpreting as UTF-8 fails). -// See the comment for GetFilenameFromCD for more details. -// 2) the suggested name -// 3) the last path component name or hostname from |url| -// 4) the given |default_name| -// 5) the hard-coded name "download", as the last resort +// Generates a filename using the first successful method from the following (in +// order): +// +// 1) The raw Content-Disposition header in |content_disposition| (as read from +// the network. |referrer_charset| is used as described in the comment for +// GetFileNameFromCD(). +// 2) |suggested_name| if specified. |suggested_name| is assumed to be in +// UTF-8. +// 3) The filename extracted from the |url|. |referrer_charset| will be used to +// interpret the URL if there are non-ascii characters. +// 4) |default_name|. If non-empty, |default_name| is assumed to be a filename +// and shouldn't contain a path. |default_name| is not subject to validation +// or sanitization, and therefore shouldn't be a user supplied string. +// 5) The hostname portion from the |url| +// +// Then, leading and trailing '.'s will be removed. On Windows, trailing spaces +// are also removed. The string "download" is the final fallback if no filename +// is found or the filename is empty. +// +// Any illegal characters in the filename will be replaced by '-'. If the +// filename doesn't contain an extension, and a |mime_type| is specified, the +// preferred extension for the |mime_type| will be appended to the filename. +// The resulting filename is then checked against a list of reserved names on +// Windows. If the name is reserved, an underscore will be prepended to the +// filename. +// +// Note: |mime_type| should only be specified if this function is called from a +// thread that allows IO. NET_EXPORT string16 GetSuggestedFilename(const GURL& url, const std::string& content_disposition, const std::string& referrer_charset, const std::string& suggested_name, + const std::string& mime_type, const string16& default_name); -// Generate a filename based on a HTTP request. -// -// The |url|, |content_disposition|, |referrer_charset|, |suggested_name|, and -// |default_name| parameters will be used with GetSuggestedFilename() to -// generate a filename. The resulting filename will be passed in along with the -// |mime_type| to GenerateSafeFileName() to generate the returned filename. +// Similar to GetSuggestedFilename(), but returns a FilePath. NET_EXPORT FilePath GenerateFileName(const GURL& url, const std::string& content_disposition, const std::string& referrer_charset, diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index 4add983..d8879fe 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -1407,15 +1407,14 @@ TEST(NetUtilTest, GenerateFileName) { L"default", L"default" }, - { // C-D specified filename disappears. Should use last component of URL - // instead. + { // C-D specified filename disappears. Failover to final filename. "http://www.google.com/test.html", "Content-disposition: attachment; filename=\"..\"", "", "", "", - L"download", - L"test.html" + L"default", + L"default" }, // Below is a small subset of cases taken from GetFileNameFromCD test above. { |