summaryrefslogtreecommitdiffstats
path: root/net/base
diff options
context:
space:
mode:
authorasanka@chromium.org <asanka@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-08-20 15:51:21 +0000
committerasanka@chromium.org <asanka@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2011-08-20 15:51:21 +0000
commit45f9adb0aebb394594a949e3312901219b62ac04 (patch)
tree44487c6bef136adcbdd50cef49bbfe498694a25f /net/base
parent8f7f1b4d437ca642b0eb09c8dd148d1fb5585eb1 (diff)
downloadchromium_src-45f9adb0aebb394594a949e3312901219b62ac04.zip
chromium_src-45f9adb0aebb394594a949e3312901219b62ac04.tar.gz
chromium_src-45f9adb0aebb394594a949e3312901219b62ac04.tar.bz2
Call GenerateSafeFilename() from GetSuggestedFilename().
GetSuggestedFilename() would then become the only function that needs to be called to generate downloaded filenames. BUG=78085 TEST=net_unittests --gtest_filter=*Generate*FileName Review URL: http://codereview.chromium.org/7607013 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@97581 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'net/base')
-rw-r--r--net/base/net_util.cc172
-rw-r--r--net/base/net_util.h45
-rw-r--r--net/base/net_util_unittest.cc7
3 files changed, 118 insertions, 106 deletions
diff --git a/net/base/net_util.cc b/net/base/net_util.cc
index ef8eb53..0be8daa 100644
--- a/net/base/net_util.cc
+++ b/net/base/net_util.cc
@@ -922,6 +922,52 @@ char* do_strdup(const char* src) {
#endif
}
+void SanitizeGeneratedFileName(std::string& filename) {
+ if (!filename.empty()) {
+ // Remove "." from the beginning and end of the file name to avoid tricks
+ // with hidden files, "..", and "."
+ TrimString(filename, ".", &filename);
+#if defined(OS_WIN)
+ // Handle CreateFile() stripping trailing dots and spaces on filenames
+ // http://support.microsoft.com/kb/115827
+ std::string::size_type pos = filename.find_last_not_of(" .");
+ if (pos == std::string::npos)
+ filename.resize(0);
+ else
+ filename.resize(++pos);
+#endif
+ // Replace any path information by changing path separators with
+ // underscores.
+ ReplaceSubstringsAfterOffset(&filename, 0, "/", "_");
+ ReplaceSubstringsAfterOffset(&filename, 0, "\\", "_");
+ }
+}
+
+std::string GetFileNameFromURL(const GURL& url,
+ const std::string& referrer_charset) {
+ // about: and data: URLs don't have file names, but esp. data: URLs may
+ // contain parts that look like ones (i.e., contain a slash). Therefore we
+ // don't attempt to divine a file name out of them.
+ if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data"))
+ return std::string();
+
+ const std::string unescaped_url_filename = UnescapeURLComponent(
+ url.ExtractFileName(),
+ UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
+
+ // The URL's path should be escaped UTF-8, but may not be.
+ std::string decoded_filename = unescaped_url_filename;
+ if (!IsStringASCII(decoded_filename)) {
+ bool ignore;
+ // TODO(jshin): this is probably not robust enough. To be sure, we need
+ // encoding detection.
+ DecodeWord(unescaped_url_filename, referrer_charset, &ignore,
+ &decoded_filename);
+ }
+
+ return decoded_filename;
+}
+
#if defined(OS_WIN)
// Returns whether the specified extension is automatically integrated into the
// windows shell.
@@ -1379,99 +1425,57 @@ string16 GetSuggestedFilename(const GURL& url,
const std::string& content_disposition,
const std::string& referrer_charset,
const std::string& suggested_name,
+ const std::string& mime_type,
const string16& default_name) {
// TODO: this function to be updated to match the httpbis recommendations.
// Talk to abarth for the latest news.
// We don't translate this fallback string, "download". If localization is
- // needed, the caller should provide localized fallback default_name.
+ // needed, the caller should provide localized fallback in |default_name|.
static const char* kFinalFallbackName = "download";
+ std::string filename; // In UTF-8
- std::string filename;
-
- // Try to extract from content-disposition first.
+ // Try to extract a filename from content-disposition first.
if (!content_disposition.empty())
filename = GetFileNameFromCD(content_disposition, referrer_charset);
- // Then try to use suggested name.
+ // Then try to use the suggested name.
if (filename.empty() && !suggested_name.empty())
filename = suggested_name;
- if (!filename.empty()) {
- // Replace any path information the server may have sent, by changing
- // path separators with underscores.
- ReplaceSubstringsAfterOffset(&filename, 0, "/", "_");
- ReplaceSubstringsAfterOffset(&filename, 0, "\\", "_");
-
- // Next, remove "." from the beginning and end of the file name to avoid
- // tricks with hidden files, "..", and "."
- TrimString(filename, ".", &filename);
- }
-
- if (filename.empty()) {
- // about: and data: URLs don't have file names, but esp. data: URLs may
- // contain parts that look like ones (i.e., contain a slash).
- // Therefore we don't attempt to divine a file name out of them.
- if (url.SchemeIs("about") || url.SchemeIs("data")) {
- return default_name.empty() ? ASCIIToUTF16(kFinalFallbackName)
- : default_name;
- }
-
- if (url.is_valid()) {
- const std::string unescaped_url_filename = UnescapeURLComponent(
- url.ExtractFileName(),
- UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
-
- // The URL's path should be escaped UTF-8, but may not be.
- std::string decoded_filename = unescaped_url_filename;
- if (!IsStringASCII(decoded_filename)) {
- bool ignore;
- // TODO(jshin): this is probably not robust enough. To be sure, we
- // need encoding detection.
- DecodeWord(unescaped_url_filename, referrer_charset, &ignore,
- &decoded_filename);
- }
+ // Now try extracting the filename from the URL. GetFileNameFromURL() only
+ // looks at the last component of the URL and doesn't return the hostname as a
+ // failover.
+ if (filename.empty())
+ filename = GetFileNameFromURL(url, referrer_charset);
- filename = decoded_filename;
- }
+ // Finally try the URL hostname, but only if there's no default specified in
+ // |default_name|. Some schemes (e.g.: file:, about:, data:) do not have a
+ // host name.
+ if (filename.empty() && default_name.empty() &&
+ url.is_valid() && !url.host().empty()) {
+ // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451)
+ filename = url.host();
}
-#if defined(OS_WIN)
- { // Handle CreateFile() stripping trailing dots and spaces on filenames
- // http://support.microsoft.com/kb/115827
- std::string::size_type pos = filename.find_last_not_of(" .");
- if (pos == std::string::npos)
- filename.resize(0);
- else
- filename.resize(++pos);
- }
-#endif
- // Trim '.' once more.
- TrimString(filename, ".", &filename);
-
- // If there's no filename or it gets trimed to be empty, use
- // the URL hostname or default_name
- if (filename.empty()) {
- if (!default_name.empty()) {
- return default_name;
- } else if (url.is_valid()) {
- // Some schemes (e.g. file) do not have a hostname. Even though it's
- // not likely to reach here, let's hardcode the last fallback name.
- // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451)
- filename = url.host().empty() ? kFinalFallbackName : url.host();
- } else {
- NOTREACHED();
- }
- }
+ SanitizeGeneratedFileName(filename);
+ // Sanitization can cause the filename to disappear (e.g.: if the filename
+ // consisted entirely of spaces and '.'s), in which case we use the default.
+ if (filename.empty() && default_name.empty())
+ filename = kFinalFallbackName;
#if defined(OS_WIN)
- string16 path = UTF8ToUTF16(filename);
+ string16 path = (filename.empty())? default_name : UTF8ToUTF16(filename);
file_util::ReplaceIllegalCharactersInPath(&path, '-');
- return path;
+ FilePath result(path);
+ GenerateSafeFileName(mime_type, &result);
+ return result.value();
#else
- std::string path = filename;
+ std::string path = (filename.empty())? UTF16ToUTF8(default_name) : filename;
file_util::ReplaceIllegalCharactersInPath(&path, '-');
- return UTF8ToUTF16(path);
+ FilePath result(path);
+ GenerateSafeFileName(mime_type, &result);
+ return UTF8ToUTF16(result.value());
#endif
}
@@ -1481,26 +1485,20 @@ FilePath GenerateFileName(const GURL& url,
const std::string& suggested_name,
const std::string& mime_type,
const string16& default_file_name) {
- string16 new_name = GetSuggestedFilename(GURL(url),
- content_disposition,
- referrer_charset,
- suggested_name,
- default_file_name);
-
- // TODO(evan): this code is totally wrong -- we should just generate
- // Unicode filenames and do all this encoding switching at the end.
- // However, I'm just shuffling wrong code around, at least not adding
- // to it.
+ string16 file_name = GetSuggestedFilename(url,
+ content_disposition,
+ referrer_charset,
+ suggested_name,
+ mime_type,
+ default_file_name);
+
#if defined(OS_WIN)
- FilePath generated_name = FilePath(new_name);
+ FilePath generated_name(file_name);
#else
- FilePath generated_name = FilePath(
- base::SysWideToNativeMB(UTF16ToWide(new_name)));
+ FilePath generated_name(base::SysWideToNativeMB(UTF16ToWide(file_name)));
#endif
-
DCHECK(!generated_name.empty());
- GenerateSafeFileName(mime_type, &generated_name);
return generated_name;
}
diff --git a/net/base/net_util.h b/net/base/net_util.h
index 76e49f6..8ccfeef 100644
--- a/net/base/net_util.h
+++ b/net/base/net_util.h
@@ -243,27 +243,42 @@ NET_EXPORT std::string GetDirectoryListingEntry(const string16& name,
// unmodified.
NET_EXPORT string16 StripWWW(const string16& text);
-// Gets the filename in the following order:
-// 1) the raw Content-Disposition header (as read from the network).
-// |referrer_charset| is used as one of charsets to interpret a raw 8bit
-// string in C-D header (after interpreting as UTF-8 fails).
-// See the comment for GetFilenameFromCD for more details.
-// 2) the suggested name
-// 3) the last path component name or hostname from |url|
-// 4) the given |default_name|
-// 5) the hard-coded name "download", as the last resort
+// Generates a filename using the first successful method from the following (in
+// order):
+//
+// 1) The raw Content-Disposition header in |content_disposition| (as read from
+// the network. |referrer_charset| is used as described in the comment for
+// GetFileNameFromCD().
+// 2) |suggested_name| if specified. |suggested_name| is assumed to be in
+// UTF-8.
+// 3) The filename extracted from the |url|. |referrer_charset| will be used to
+// interpret the URL if there are non-ascii characters.
+// 4) |default_name|. If non-empty, |default_name| is assumed to be a filename
+// and shouldn't contain a path. |default_name| is not subject to validation
+// or sanitization, and therefore shouldn't be a user supplied string.
+// 5) The hostname portion from the |url|
+//
+// Then, leading and trailing '.'s will be removed. On Windows, trailing spaces
+// are also removed. The string "download" is the final fallback if no filename
+// is found or the filename is empty.
+//
+// Any illegal characters in the filename will be replaced by '-'. If the
+// filename doesn't contain an extension, and a |mime_type| is specified, the
+// preferred extension for the |mime_type| will be appended to the filename.
+// The resulting filename is then checked against a list of reserved names on
+// Windows. If the name is reserved, an underscore will be prepended to the
+// filename.
+//
+// Note: |mime_type| should only be specified if this function is called from a
+// thread that allows IO.
NET_EXPORT string16 GetSuggestedFilename(const GURL& url,
const std::string& content_disposition,
const std::string& referrer_charset,
const std::string& suggested_name,
+ const std::string& mime_type,
const string16& default_name);
-// Generate a filename based on a HTTP request.
-//
-// The |url|, |content_disposition|, |referrer_charset|, |suggested_name|, and
-// |default_name| parameters will be used with GetSuggestedFilename() to
-// generate a filename. The resulting filename will be passed in along with the
-// |mime_type| to GenerateSafeFileName() to generate the returned filename.
+// Similar to GetSuggestedFilename(), but returns a FilePath.
NET_EXPORT FilePath GenerateFileName(const GURL& url,
const std::string& content_disposition,
const std::string& referrer_charset,
diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc
index 4add983..d8879fe 100644
--- a/net/base/net_util_unittest.cc
+++ b/net/base/net_util_unittest.cc
@@ -1407,15 +1407,14 @@ TEST(NetUtilTest, GenerateFileName) {
L"default",
L"default"
},
- { // C-D specified filename disappears. Should use last component of URL
- // instead.
+ { // C-D specified filename disappears. Failover to final filename.
"http://www.google.com/test.html",
"Content-disposition: attachment; filename=\"..\"",
"",
"",
"",
- L"download",
- L"test.html"
+ L"default",
+ L"default"
},
// Below is a small subset of cases taken from GetFileNameFromCD test above.
{