diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/base/filename_util.cc | 506 | ||||
-rw-r--r-- | net/base/filename_util.h | 116 | ||||
-rw-r--r-- | net/base/filename_util_unittest.cc | 1652 | ||||
-rw-r--r-- | net/base/net_util.cc | 412 | ||||
-rw-r--r-- | net/base/net_util.h | 94 | ||||
-rw-r--r-- | net/base/net_util_posix.cc | 34 | ||||
-rw-r--r-- | net/base/net_util_unittest.cc | 1638 | ||||
-rw-r--r-- | net/base/net_util_win.cc | 53 | ||||
-rw-r--r-- | net/filter/filter.cc | 3 | ||||
-rw-r--r-- | net/net.gyp | 17 | ||||
-rw-r--r-- | net/proxy/proxy_script_fetcher_impl_unittest.cc | 2 | ||||
-rw-r--r-- | net/url_request/file_protocol_handler.cc | 2 | ||||
-rw-r--r-- | net/url_request/url_request_file_job.cc | 2 | ||||
-rw-r--r-- | net/url_request/url_request_unittest.cc | 1 |
14 files changed, 2239 insertions, 2293 deletions
diff --git a/net/base/filename_util.cc b/net/base/filename_util.cc deleted file mode 100644 index 82f0a03..0000000 --- a/net/base/filename_util.cc +++ /dev/null @@ -1,506 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "net/base/filename_util.h" - -#include "base/file_util.h" -#include "base/files/file_path.h" -#include "base/i18n/file_util_icu.h" -#include "base/i18n/icu_string_conversions.h" -#include "base/path_service.h" -#include "base/strings/string_util.h" -#include "base/strings/sys_string_conversions.h" -#include "base/strings/utf_string_conversions.h" -#include "base/threading/thread_restrictions.h" -#include "net/base/escape.h" -#include "net/base/mime_util.h" -#include "net/http/http_content_disposition.h" -#include "url/gurl.h" - -namespace net { - -namespace { - -// what we prepend to get a file URL -static const base::FilePath::CharType kFileURLPrefix[] = - FILE_PATH_LITERAL("file:///"); - -void SanitizeGeneratedFileName(base::FilePath::StringType* filename, - bool replace_trailing) { - const base::FilePath::CharType kReplace[] = FILE_PATH_LITERAL("-"); - if (filename->empty()) - return; - if (replace_trailing) { - // Handle CreateFile() stripping trailing dots and spaces on filenames - // http://support.microsoft.com/kb/115827 - size_t length = filename->size(); - size_t pos = filename->find_last_not_of(FILE_PATH_LITERAL(" .")); - filename->resize((pos == std::string::npos) ? 0 : (pos + 1)); - base::TrimWhitespace(*filename, base::TRIM_TRAILING, filename); - if (filename->empty()) - return; - size_t trimmed = length - filename->size(); - if (trimmed) - filename->insert(filename->end(), trimmed, kReplace[0]); - } - base::TrimString(*filename, FILE_PATH_LITERAL("."), filename); - if (filename->empty()) - return; - // Replace any path information by changing path separators. - ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("/"), kReplace); - ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("\\"), kReplace); -} - -// Returns the filename determined from the last component of the path portion -// of the URL. Returns an empty string if the URL doesn't have a path or is -// invalid. If the generated filename is not reliable, -// |should_overwrite_extension| will be set to true, in which case a better -// extension should be determined based on the content type. -std::string GetFileNameFromURL(const GURL& url, - const std::string& referrer_charset, - bool* should_overwrite_extension) { - // about: and data: URLs don't have file names, but esp. data: URLs may - // contain parts that look like ones (i.e., contain a slash). Therefore we - // don't attempt to divine a file name out of them. - if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data")) - return std::string(); - - const std::string unescaped_url_filename = UnescapeURLComponent( - url.ExtractFileName(), - UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); - - // The URL's path should be escaped UTF-8, but may not be. - std::string decoded_filename = unescaped_url_filename; - if (!IsStringUTF8(decoded_filename)) { - // TODO(jshin): this is probably not robust enough. To be sure, we need - // encoding detection. - base::string16 utf16_output; - if (!referrer_charset.empty() && - base::CodepageToUTF16(unescaped_url_filename, - referrer_charset.c_str(), - base::OnStringConversionError::FAIL, - &utf16_output)) { - decoded_filename = base::UTF16ToUTF8(utf16_output); - } else { - decoded_filename = base::WideToUTF8( - base::SysNativeMBToWide(unescaped_url_filename)); - } - } - // If the URL contains a (possibly empty) query, assume it is a generator, and - // allow the determined extension to be overwritten. - *should_overwrite_extension = !decoded_filename.empty() && url.has_query(); - - return decoded_filename; -} - -// Returns whether the specified extension is automatically integrated into the -// windows shell. -bool IsShellIntegratedExtension(const base::FilePath::StringType& extension) { - base::FilePath::StringType extension_lower = StringToLowerASCII(extension); - - // http://msdn.microsoft.com/en-us/library/ms811694.aspx - // Right-clicking on shortcuts can be magical. - if ((extension_lower == FILE_PATH_LITERAL("local")) || - (extension_lower == FILE_PATH_LITERAL("lnk"))) - return true; - - // http://www.juniper.net/security/auto/vulnerabilities/vuln2612.html - // Files become magical if they end in a CLSID, so block such extensions. - if (!extension_lower.empty() && - (extension_lower[0] == FILE_PATH_LITERAL('{')) && - (extension_lower[extension_lower.length() - 1] == FILE_PATH_LITERAL('}'))) - return true; - return false; -} - -// Returns whether the specified file name is a reserved name on windows. -// This includes names like "com2.zip" (which correspond to devices) and -// desktop.ini and thumbs.db which have special meaning to the windows shell. -bool IsReservedName(const base::FilePath::StringType& filename) { - // This list is taken from the MSDN article "Naming a file" - // http://msdn2.microsoft.com/en-us/library/aa365247(VS.85).aspx - // I also added clock$ because GetSaveFileName seems to consider it as a - // reserved name too. - static const char* const known_devices[] = { - "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5", - "com6", "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4", - "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "clock$" - }; -#if defined(OS_WIN) - std::string filename_lower = StringToLowerASCII(base::WideToUTF8(filename)); -#elif defined(OS_POSIX) - std::string filename_lower = StringToLowerASCII(filename); -#endif - - for (size_t i = 0; i < arraysize(known_devices); ++i) { - // Exact match. - if (filename_lower == known_devices[i]) - return true; - // Starts with "DEVICE.". - if (filename_lower.find(std::string(known_devices[i]) + ".") == 0) - return true; - } - - static const char* const magic_names[] = { - // These file names are used by the "Customize folder" feature of the shell. - "desktop.ini", - "thumbs.db", - }; - - for (size_t i = 0; i < arraysize(magic_names); ++i) { - if (filename_lower == magic_names[i]) - return true; - } - - return false; -} - - -// Examines the current extension in |file_name| and modifies it if necessary in -// order to ensure the filename is safe. If |file_name| doesn't contain an -// extension or if |ignore_extension| is true, then a new extension will be -// constructed based on the |mime_type|. -// -// We're addressing two things here: -// -// 1) Usability. If there is no reliable file extension, we want to guess a -// reasonable file extension based on the content type. -// -// 2) Shell integration. Some file extensions automatically integrate with the -// shell. We block these extensions to prevent a malicious web site from -// integrating with the user's shell. -void EnsureSafeExtension(const std::string& mime_type, - bool ignore_extension, - base::FilePath* file_name) { - // See if our file name already contains an extension. - base::FilePath::StringType extension = file_name->Extension(); - if (!extension.empty()) - extension.erase(extension.begin()); // Erase preceding '.'. - - if ((ignore_extension || extension.empty()) && !mime_type.empty()) { - base::FilePath::StringType preferred_mime_extension; - std::vector<base::FilePath::StringType> all_mime_extensions; - // The GetPreferredExtensionForMimeType call will end up going to disk. Do - // this on another thread to avoid slowing the IO thread. - // http://crbug.com/61827 - // TODO(asanka): Remove this ScopedAllowIO once all callers have switched - // over to IO safe threads. - base::ThreadRestrictions::ScopedAllowIO allow_io; - net::GetPreferredExtensionForMimeType(mime_type, &preferred_mime_extension); - net::GetExtensionsForMimeType(mime_type, &all_mime_extensions); - // If the existing extension is in the list of valid extensions for the - // given type, use it. This avoids doing things like pointlessly renaming - // "foo.jpg" to "foo.jpeg". - if (std::find(all_mime_extensions.begin(), - all_mime_extensions.end(), - extension) != all_mime_extensions.end()) { - // leave |extension| alone - } else if (!preferred_mime_extension.empty()) { - extension = preferred_mime_extension; - } - } - -#if defined(OS_WIN) - static const base::FilePath::CharType default_extension[] = - FILE_PATH_LITERAL("download"); - - // Rename shell-integrated extensions. - // TODO(asanka): Consider stripping out the bad extension and replacing it - // with the preferred extension for the MIME type if one is available. - if (IsShellIntegratedExtension(extension)) - extension.assign(default_extension); -#endif - - *file_name = file_name->ReplaceExtension(extension); -} - -bool FilePathToString16(const base::FilePath& path, base::string16* converted) { -#if defined(OS_WIN) - return base::WideToUTF16( - path.value().c_str(), path.value().size(), converted); -#elif defined(OS_POSIX) - std::string component8 = path.AsUTF8Unsafe(); - return !component8.empty() && - base::UTF8ToUTF16(component8.c_str(), component8.size(), converted); -#endif -} - -} // namespace - -GURL FilePathToFileURL(const base::FilePath& path) { - // Produce a URL like "file:///C:/foo" for a regular file, or - // "file://///server/path" for UNC. The URL canonicalizer will fix up the - // latter case to be the canonical UNC form: "file://server/path" - base::FilePath::StringType url_string(kFileURLPrefix); - if (!path.IsAbsolute()) { - base::FilePath current_dir; - PathService::Get(base::DIR_CURRENT, ¤t_dir); - url_string.append(current_dir.value()); - url_string.push_back(base::FilePath::kSeparators[0]); - } - url_string.append(path.value()); - - // Now do replacement of some characters. Since we assume the input is a - // literal filename, anything the URL parser might consider special should - // be escaped here. - - // must be the first substitution since others will introduce percents as the - // escape character - ReplaceSubstringsAfterOffset(&url_string, 0, - FILE_PATH_LITERAL("%"), FILE_PATH_LITERAL("%25")); - - // semicolon is supposed to be some kind of separator according to RFC 2396 - ReplaceSubstringsAfterOffset(&url_string, 0, - FILE_PATH_LITERAL(";"), FILE_PATH_LITERAL("%3B")); - - ReplaceSubstringsAfterOffset(&url_string, 0, - FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); - - ReplaceSubstringsAfterOffset(&url_string, 0, - FILE_PATH_LITERAL("?"), FILE_PATH_LITERAL("%3F")); - -#if defined(OS_POSIX) - ReplaceSubstringsAfterOffset(&url_string, 0, - FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); -#endif - - return GURL(url_string); -} - -bool FileURLToFilePath(const GURL& url, base::FilePath* file_path) { - *file_path = base::FilePath(); - base::FilePath::StringType& file_path_str = - const_cast<base::FilePath::StringType&>(file_path->value()); - file_path_str.clear(); - - if (!url.is_valid()) - return false; - -#if defined(OS_WIN) - std::string path; - std::string host = url.host(); - if (host.empty()) { - // URL contains no host, the path is the filename. In this case, the path - // will probably be preceeded with a slash, as in "/C:/foo.txt", so we - // trim out that here. - path = url.path(); - size_t first_non_slash = path.find_first_not_of("/\\"); - if (first_non_slash != std::string::npos && first_non_slash > 0) - path.erase(0, first_non_slash); - } else { - // URL contains a host: this means it's UNC. We keep the preceeding slash - // on the path. - path = "\\\\"; - path.append(host); - path.append(url.path()); - } - std::replace(path.begin(), path.end(), '/', '\\'); -#else // defined(OS_WIN) - // Firefox seems to ignore the "host" of a file url if there is one. That is, - // file://foo/bar.txt maps to /bar.txt. - // TODO(dhg): This should probably take into account UNCs which could - // include a hostname other than localhost or blank - std::string path = url.path(); -#endif // !defined(OS_WIN) - - if (path.empty()) - return false; - - // GURL stores strings as percent-encoded 8-bit, this will undo if possible. - path = UnescapeURLComponent(path, - UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); - -#if defined(OS_WIN) - if (IsStringUTF8(path)) { - file_path_str.assign(base::UTF8ToWide(path)); - // We used to try too hard and see if |path| made up entirely of - // the 1st 256 characters in the Unicode was a zero-extended UTF-16. - // If so, we converted it to 'Latin-1' and checked if the result was UTF-8. - // If the check passed, we converted the result to UTF-8. - // Otherwise, we treated the result as the native OS encoding. - // However, that led to http://crbug.com/4619 and http://crbug.com/14153 - } else { - // Not UTF-8, assume encoding is native codepage and we're done. We know we - // are giving the conversion function a nonempty string, and it may fail if - // the given string is not in the current encoding and give us an empty - // string back. We detect this and report failure. - file_path_str = base::SysNativeMBToWide(path); - } -#else // defined(OS_WIN) - // Collapse multiple path slashes into a single path slash. - std::string new_path; - do { - new_path = path; - ReplaceSubstringsAfterOffset(&new_path, 0, "//", "/"); - path.swap(new_path); - } while (new_path != path); - - file_path_str.assign(path); -#endif // !defined(OS_WIN) - - return !file_path_str.empty(); -} - -bool IsSafePortablePathComponent(const base::FilePath& component) { - base::string16 component16; - base::FilePath::StringType sanitized = component.value(); - SanitizeGeneratedFileName(&sanitized, true); - base::FilePath::StringType extension = component.Extension(); - if (!extension.empty()) - extension.erase(extension.begin()); // Erase preceding '.'. - return !component.empty() && - (component == component.BaseName()) && - (component == component.StripTrailingSeparators()) && - FilePathToString16(component, &component16) && - file_util::IsFilenameLegal(component16) && - !IsShellIntegratedExtension(extension) && - (sanitized == component.value()) && - !IsReservedName(component.value()); -} - -bool IsSafePortableRelativePath(const base::FilePath& path) { - if (path.empty() || path.IsAbsolute() || path.EndsWithSeparator()) - return false; - std::vector<base::FilePath::StringType> components; - path.GetComponents(&components); - if (components.empty()) - return false; - for (size_t i = 0; i < components.size() - 1; ++i) { - if (!IsSafePortablePathComponent(base::FilePath(components[i]))) - return false; - } - return IsSafePortablePathComponent(path.BaseName()); -} - -void GenerateSafeFileName(const std::string& mime_type, - bool ignore_extension, - base::FilePath* file_path) { - // Make sure we get the right file extension - EnsureSafeExtension(mime_type, ignore_extension, file_path); - -#if defined(OS_WIN) - // Prepend "_" to the file name if it's a reserved name - base::FilePath::StringType leaf_name = file_path->BaseName().value(); - DCHECK(!leaf_name.empty()); - if (IsReservedName(leaf_name)) { - leaf_name = base::FilePath::StringType(FILE_PATH_LITERAL("_")) + leaf_name; - *file_path = file_path->DirName(); - if (file_path->value() == base::FilePath::kCurrentDirectory) { - *file_path = base::FilePath(leaf_name); - } else { - *file_path = file_path->Append(leaf_name); - } - } -#endif -} - -base::string16 GetSuggestedFilename(const GURL& url, - const std::string& content_disposition, - const std::string& referrer_charset, - const std::string& suggested_name, - const std::string& mime_type, - const std::string& default_name) { - // TODO: this function to be updated to match the httpbis recommendations. - // Talk to abarth for the latest news. - - // We don't translate this fallback string, "download". If localization is - // needed, the caller should provide localized fallback in |default_name|. - static const base::FilePath::CharType kFinalFallbackName[] = - FILE_PATH_LITERAL("download"); - std::string filename; // In UTF-8 - bool overwrite_extension = false; - - // Try to extract a filename from content-disposition first. - if (!content_disposition.empty()) { - HttpContentDisposition header(content_disposition, referrer_charset); - filename = header.filename(); - } - - // Then try to use the suggested name. - if (filename.empty() && !suggested_name.empty()) - filename = suggested_name; - - // Now try extracting the filename from the URL. GetFileNameFromURL() only - // looks at the last component of the URL and doesn't return the hostname as a - // failover. - if (filename.empty()) - filename = GetFileNameFromURL(url, referrer_charset, &overwrite_extension); - - // Finally try the URL hostname, but only if there's no default specified in - // |default_name|. Some schemes (e.g.: file:, about:, data:) do not have a - // host name. - if (filename.empty() && - default_name.empty() && - url.is_valid() && - !url.host().empty()) { - // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451) - filename = url.host(); - } - - bool replace_trailing = false; - base::FilePath::StringType result_str, default_name_str; -#if defined(OS_WIN) - replace_trailing = true; - result_str = base::UTF8ToUTF16(filename); - default_name_str = base::UTF8ToUTF16(default_name); -#else - result_str = filename; - default_name_str = default_name; -#endif - SanitizeGeneratedFileName(&result_str, replace_trailing); - if (result_str.find_last_not_of(FILE_PATH_LITERAL("-_")) == - base::FilePath::StringType::npos) { - result_str = !default_name_str.empty() ? default_name_str : - base::FilePath::StringType(kFinalFallbackName); - overwrite_extension = false; - } - file_util::ReplaceIllegalCharactersInPath(&result_str, '-'); - base::FilePath result(result_str); - GenerateSafeFileName(mime_type, overwrite_extension, &result); - - base::string16 result16; - if (!FilePathToString16(result, &result16)) { - result = base::FilePath(default_name_str); - if (!FilePathToString16(result, &result16)) { - result = base::FilePath(kFinalFallbackName); - FilePathToString16(result, &result16); - } - } - return result16; -} - -base::FilePath GenerateFileName(const GURL& url, - const std::string& content_disposition, - const std::string& referrer_charset, - const std::string& suggested_name, - const std::string& mime_type, - const std::string& default_file_name) { - base::string16 file_name = GetSuggestedFilename(url, - content_disposition, - referrer_charset, - suggested_name, - mime_type, - default_file_name); - -#if defined(OS_WIN) - base::FilePath generated_name(file_name); -#else - base::FilePath generated_name( - base::SysWideToNativeMB(base::UTF16ToWide(file_name))); -#endif - -#if defined(OS_CHROMEOS) - // When doing file manager operations on ChromeOS, the file paths get - // normalized in WebKit layer, so let's ensure downloaded files have - // normalized names. Otherwise, we won't be able to handle files with NFD - // utf8 encoded characters in name. - file_util::NormalizeFileNameEncoding(&generated_name); -#endif - - DCHECK(!generated_name.empty()); - - return generated_name; -} - -} // namespace net diff --git a/net/base/filename_util.h b/net/base/filename_util.h deleted file mode 100644 index 6b151ba..0000000 --- a/net/base/filename_util.h +++ /dev/null @@ -1,116 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef NET_BASE_FILENAME_UTIL_H_ -#define NET_BASE_FILENAME_UTIL_H_ - -#include <string> - -#include "base/strings/string16.h" -#include "net/base/net_export.h" - -class GURL; - -namespace base { -class FilePath; -} - -namespace net { - -// Given the full path to a file name, creates a file: URL. The returned URL -// may not be valid if the input is malformed. -NET_EXPORT GURL FilePathToFileURL(const base::FilePath& path); - -// Converts a file: URL back to a filename that can be passed to the OS. The -// file URL must be well-formed (GURL::is_valid() must return true); we don't -// handle degenerate cases here. Returns true on success, false if it isn't a -// valid file URL. On failure, *file_path will be empty. -NET_EXPORT bool FileURLToFilePath(const GURL& url, base::FilePath* file_path); - -// Generates a filename using the first successful method from the following (in -// order): -// -// 1) The raw Content-Disposition header in |content_disposition| as read from -// the network. |referrer_charset| is used to decode non-ASCII strings. -// 2) |suggested_name| if specified. |suggested_name| is assumed to be in -// UTF-8. -// 3) The filename extracted from the |url|. |referrer_charset| will be used to -// interpret the URL if there are non-ascii characters. -// 4) |default_name|. If non-empty, |default_name| is assumed to be a filename -// and shouldn't contain a path. |default_name| is not subject to validation -// or sanitization, and therefore shouldn't be a user supplied string. -// 5) The hostname portion from the |url| -// -// Then, leading and trailing '.'s will be removed. On Windows, trailing spaces -// are also removed. The string "download" is the final fallback if no filename -// is found or the filename is empty. -// -// Any illegal characters in the filename will be replaced by '-'. If the -// filename doesn't contain an extension, and a |mime_type| is specified, the -// preferred extension for the |mime_type| will be appended to the filename. -// The resulting filename is then checked against a list of reserved names on -// Windows. If the name is reserved, an underscore will be prepended to the -// filename. -// -// Note: |mime_type| should only be specified if this function is called from a -// thread that allows IO. -NET_EXPORT base::string16 GetSuggestedFilename( - const GURL& url, - const std::string& content_disposition, - const std::string& referrer_charset, - const std::string& suggested_name, - const std::string& mime_type, - const std::string& default_name); - -// Similar to GetSuggestedFilename(), but returns a FilePath. -NET_EXPORT base::FilePath GenerateFileName( - const GURL& url, - const std::string& content_disposition, - const std::string& referrer_charset, - const std::string& suggested_name, - const std::string& mime_type, - const std::string& default_name); - -// Valid components: -// * are not empty -// * are not Windows reserved names (CON, NUL.zip, etc.) -// * do not have trailing separators -// * do not equal kCurrentDirectory -// * do not reference the parent directory -// * do not contain illegal characters -// * do not end with Windows shell-integrated extensions (even on posix) -// * do not begin with '.' (which would hide them in most file managers) -// * do not end with ' ' or '.' -NET_EXPORT bool IsSafePortablePathComponent(const base::FilePath& component); - -// Basenames of valid relative paths are IsSafePortableBasename(), and internal -// path components of valid relative paths are valid path components as -// described above IsSafePortableBasename(). Valid relative paths are not -// absolute paths. -NET_EXPORT bool IsSafePortableRelativePath(const base::FilePath& path); - -// Ensures that the filename and extension is safe to use in the filesystem. -// -// Assumes that |file_path| already contains a valid path or file name. On -// Windows if the extension causes the file to have an unsafe interaction with -// the shell (see net_util::IsShellIntegratedExtension()), then it will be -// replaced by the string 'download'. If |file_path| doesn't contain an -// extension or |ignore_extension| is true then the preferred extension, if one -// exists, for |mime_type| will be used as the extension. -// -// On Windows, the filename will be checked against a set of reserved names, and -// if so, an underscore will be prepended to the name. -// -// |file_name| can either be just the file name or it can be a full path to a -// file. -// -// Note: |mime_type| should only be non-empty if this function is called from a -// thread that allows IO. -NET_EXPORT void GenerateSafeFileName(const std::string& mime_type, - bool ignore_extension, - base::FilePath* file_path); - -} // namespace net - -#endif // NET_BASE_FILENAME_UTIL_H_ diff --git a/net/base/filename_util_unittest.cc b/net/base/filename_util_unittest.cc deleted file mode 100644 index 701e772..0000000 --- a/net/base/filename_util_unittest.cc +++ /dev/null @@ -1,1652 +0,0 @@ -// Copyright 2014 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "net/base/filename_util.h" - -#include "base/file_util.h" -#include "base/files/file_path.h" -#include "base/strings/string_util.h" -#include "base/strings/utf_string_conversions.h" -#include "base/test/test_file_util.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "url/gurl.h" - -namespace net { - -namespace { - -struct FileCase { - const wchar_t* file; - const char* url; -}; - -struct GenerateFilenameCase { - int lineno; - const char* url; - const char* content_disp_header; - const char* referrer_charset; - const char* suggested_filename; - const char* mime_type; - const wchar_t* default_filename; - const wchar_t* expected_filename; -}; - -void RunGenerateFileNameTestCase(const GenerateFilenameCase* test_case) { - std::string default_filename(base::WideToUTF8(test_case->default_filename)); - base::FilePath file_path = GenerateFileName( - GURL(test_case->url), test_case->content_disp_header, - test_case->referrer_charset, test_case->suggested_filename, - test_case->mime_type, default_filename); - EXPECT_EQ(test_case->expected_filename, - file_util::FilePathAsWString(file_path)) - << "test case at line number: " << test_case->lineno; -} - -} // namespace - -static const base::FilePath::CharType* kSafePortableBasenames[] = { - FILE_PATH_LITERAL("a"), - FILE_PATH_LITERAL("a.txt"), - FILE_PATH_LITERAL("a b.txt"), - FILE_PATH_LITERAL("a-b.txt"), - FILE_PATH_LITERAL("My Computer"), - FILE_PATH_LITERAL(" Computer"), -}; - -static const base::FilePath::CharType* kUnsafePortableBasenames[] = { - FILE_PATH_LITERAL(""), - FILE_PATH_LITERAL("."), - FILE_PATH_LITERAL(".."), - FILE_PATH_LITERAL("..."), - FILE_PATH_LITERAL("con"), - FILE_PATH_LITERAL("con.zip"), - FILE_PATH_LITERAL("NUL"), - FILE_PATH_LITERAL("NUL.zip"), - FILE_PATH_LITERAL(".a"), - FILE_PATH_LITERAL("a."), - FILE_PATH_LITERAL("a\"a"), - FILE_PATH_LITERAL("a<a"), - FILE_PATH_LITERAL("a>a"), - FILE_PATH_LITERAL("a?a"), - FILE_PATH_LITERAL("a/"), - FILE_PATH_LITERAL("a\\"), - FILE_PATH_LITERAL("a "), - FILE_PATH_LITERAL("a . ."), - FILE_PATH_LITERAL("My Computer.{a}"), - FILE_PATH_LITERAL("My Computer.{20D04FE0-3AEA-1069-A2D8-08002B30309D}"), -#if !defined(OS_WIN) - FILE_PATH_LITERAL("a\\a"), -#endif -}; - -static const base::FilePath::CharType* kSafePortableRelativePaths[] = { - FILE_PATH_LITERAL("a/a"), -#if defined(OS_WIN) - FILE_PATH_LITERAL("a\\a"), -#endif -}; - -TEST(FilenameUtilTest, IsSafePortablePathComponent) { - for (size_t i = 0 ; i < arraysize(kSafePortableBasenames); ++i) { - EXPECT_TRUE(IsSafePortablePathComponent(base::FilePath( - kSafePortableBasenames[i]))) << kSafePortableBasenames[i]; - } - for (size_t i = 0 ; i < arraysize(kUnsafePortableBasenames); ++i) { - EXPECT_FALSE(IsSafePortablePathComponent(base::FilePath( - kUnsafePortableBasenames[i]))) << kUnsafePortableBasenames[i]; - } - for (size_t i = 0 ; i < arraysize(kSafePortableRelativePaths); ++i) { - EXPECT_FALSE(IsSafePortablePathComponent(base::FilePath( - kSafePortableRelativePaths[i]))) << kSafePortableRelativePaths[i]; - } -} - -TEST(FilenameUtilTest, IsSafePortableRelativePath) { - base::FilePath safe_dirname(FILE_PATH_LITERAL("a")); - for (size_t i = 0 ; i < arraysize(kSafePortableBasenames); ++i) { - EXPECT_TRUE(IsSafePortableRelativePath(base::FilePath( - kSafePortableBasenames[i]))) << kSafePortableBasenames[i]; - EXPECT_TRUE(IsSafePortableRelativePath(safe_dirname.Append(base::FilePath( - kSafePortableBasenames[i])))) << kSafePortableBasenames[i]; - } - for (size_t i = 0 ; i < arraysize(kSafePortableRelativePaths); ++i) { - EXPECT_TRUE(IsSafePortableRelativePath(base::FilePath( - kSafePortableRelativePaths[i]))) << kSafePortableRelativePaths[i]; - EXPECT_TRUE(IsSafePortableRelativePath(safe_dirname.Append(base::FilePath( - kSafePortableRelativePaths[i])))) << kSafePortableRelativePaths[i]; - } - for (size_t i = 0 ; i < arraysize(kUnsafePortableBasenames); ++i) { - EXPECT_FALSE(IsSafePortableRelativePath(base::FilePath( - kUnsafePortableBasenames[i]))) << kUnsafePortableBasenames[i]; - if (!base::FilePath::StringType(kUnsafePortableBasenames[i]).empty()) { - EXPECT_FALSE(IsSafePortableRelativePath(safe_dirname.Append( - base::FilePath(kUnsafePortableBasenames[i])))) - << kUnsafePortableBasenames[i]; - } - } -} - -TEST(FilenameUtilTest, FileURLConversion) { - // a list of test file names and the corresponding URLs - const FileCase round_trip_cases[] = { -#if defined(OS_WIN) - {L"C:\\foo\\bar.txt", "file:///C:/foo/bar.txt"}, - {L"\\\\some computer\\foo\\bar.txt", - "file://some%20computer/foo/bar.txt"}, // UNC - {L"D:\\Name;with%some symbols*#", - "file:///D:/Name%3Bwith%25some%20symbols*%23"}, - // issue 14153: To be tested with the OS default codepage other than 1252. - {L"D:\\latin1\\caf\x00E9\x00DD.txt", - "file:///D:/latin1/caf%C3%A9%C3%9D.txt"}, - {L"D:\\otherlatin\\caf\x0119.txt", - "file:///D:/otherlatin/caf%C4%99.txt"}, - {L"D:\\greek\\\x03B1\x03B2\x03B3.txt", - "file:///D:/greek/%CE%B1%CE%B2%CE%B3.txt"}, - {L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc", - "file:///D:/Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD%91" - "%E9%A1%B5.doc"}, - {L"D:\\plane1\\\xD835\xDC00\xD835\xDC01.txt", // Math alphabet "AB" - "file:///D:/plane1/%F0%9D%90%80%F0%9D%90%81.txt"}, -#elif defined(OS_POSIX) - {L"/foo/bar.txt", "file:///foo/bar.txt"}, - {L"/foo/BAR.txt", "file:///foo/BAR.txt"}, - {L"/C:/foo/bar.txt", "file:///C:/foo/bar.txt"}, - {L"/foo/bar?.txt", "file:///foo/bar%3F.txt"}, - {L"/some computer/foo/bar.txt", "file:///some%20computer/foo/bar.txt"}, - {L"/Name;with%some symbols*#", "file:///Name%3Bwith%25some%20symbols*%23"}, - {L"/latin1/caf\x00E9\x00DD.txt", "file:///latin1/caf%C3%A9%C3%9D.txt"}, - {L"/otherlatin/caf\x0119.txt", "file:///otherlatin/caf%C4%99.txt"}, - {L"/greek/\x03B1\x03B2\x03B3.txt", "file:///greek/%CE%B1%CE%B2%CE%B3.txt"}, - {L"/Chinese/\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc", - "file:///Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD" - "%91%E9%A1%B5.doc"}, - {L"/plane1/\x1D400\x1D401.txt", // Math alphabet "AB" - "file:///plane1/%F0%9D%90%80%F0%9D%90%81.txt"}, -#endif - }; - - // First, we'll test that we can round-trip all of the above cases of URLs - base::FilePath output; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(round_trip_cases); i++) { - // convert to the file URL - GURL file_url(FilePathToFileURL( - file_util::WStringAsFilePath(round_trip_cases[i].file))); - EXPECT_EQ(round_trip_cases[i].url, file_url.spec()); - - // Back to the filename. - EXPECT_TRUE(FileURLToFilePath(file_url, &output)); - EXPECT_EQ(round_trip_cases[i].file, file_util::FilePathAsWString(output)); - } - - // Test that various file: URLs get decoded into the correct file type - FileCase url_cases[] = { -#if defined(OS_WIN) - {L"C:\\foo\\bar.txt", "file:c|/foo\\bar.txt"}, - {L"C:\\foo\\bar.txt", "file:/c:/foo/bar.txt"}, - {L"\\\\foo\\bar.txt", "file://foo\\bar.txt"}, - {L"C:\\foo\\bar.txt", "file:///c:/foo/bar.txt"}, - {L"\\\\foo\\bar.txt", "file:////foo\\bar.txt"}, - {L"\\\\foo\\bar.txt", "file:/foo/bar.txt"}, - {L"\\\\foo\\bar.txt", "file://foo\\bar.txt"}, - {L"C:\\foo\\bar.txt", "file:\\\\\\c:/foo/bar.txt"}, -#elif defined(OS_POSIX) - {L"/c:/foo/bar.txt", "file:/c:/foo/bar.txt"}, - {L"/c:/foo/bar.txt", "file:///c:/foo/bar.txt"}, - {L"/foo/bar.txt", "file:/foo/bar.txt"}, - {L"/c:/foo/bar.txt", "file:\\\\\\c:/foo/bar.txt"}, - {L"/foo/bar.txt", "file:foo/bar.txt"}, - {L"/bar.txt", "file://foo/bar.txt"}, - {L"/foo/bar.txt", "file:///foo/bar.txt"}, - {L"/foo/bar.txt", "file:////foo/bar.txt"}, - {L"/foo/bar.txt", "file:////foo//bar.txt"}, - {L"/foo/bar.txt", "file:////foo///bar.txt"}, - {L"/foo/bar.txt", "file:////foo////bar.txt"}, - {L"/c:/foo/bar.txt", "file:\\\\\\c:/foo/bar.txt"}, - {L"/c:/foo/bar.txt", "file:c:/foo/bar.txt"}, - // We get these wrong because GURL turns back slashes into forward - // slashes. - //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"}, - //{L"/c|/foo%5Cbar.txt", "file:c|/foo\\bar.txt"}, - //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"}, - //{L"/foo%5Cbar.txt", "file:////foo\\bar.txt"}, - //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"}, -#endif - }; - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(url_cases); i++) { - FileURLToFilePath(GURL(url_cases[i].url), &output); - EXPECT_EQ(url_cases[i].file, file_util::FilePathAsWString(output)); - } - - // Unfortunately, UTF8ToWide discards invalid UTF8 input. -#ifdef BUG_878908_IS_FIXED - // Test that no conversion happens if the UTF-8 input is invalid, and that - // the input is preserved in UTF-8 - const char invalid_utf8[] = "file:///d:/Blah/\xff.doc"; - const wchar_t invalid_wide[] = L"D:\\Blah\\\xff.doc"; - EXPECT_TRUE(FileURLToFilePath( - GURL(std::string(invalid_utf8)), &output)); - EXPECT_EQ(std::wstring(invalid_wide), output); -#endif - - // Test that if a file URL is malformed, we get a failure - EXPECT_FALSE(FileURLToFilePath(GURL("filefoobar"), &output)); -} - -#if defined(OS_WIN) -#define JPEG_EXT L".jpg" -#define HTML_EXT L".htm" -#elif defined(OS_MACOSX) -#define JPEG_EXT L".jpeg" -#define HTML_EXT L".html" -#else -#define JPEG_EXT L".jpg" -#define HTML_EXT L".html" -#endif -#define TXT_EXT L".txt" -#define TAR_EXT L".tar" - -TEST(FilenameUtilTest, GenerateSafeFileName) { - const struct { - const char* mime_type; - const base::FilePath::CharType* filename; - const base::FilePath::CharType* expected_filename; - } safe_tests[] = { -#if defined(OS_WIN) - { - "text/html", - FILE_PATH_LITERAL("C:\\foo\\bar.htm"), - FILE_PATH_LITERAL("C:\\foo\\bar.htm") - }, - { - "text/html", - FILE_PATH_LITERAL("C:\\foo\\bar.html"), - FILE_PATH_LITERAL("C:\\foo\\bar.html") - }, - { - "text/html", - FILE_PATH_LITERAL("C:\\foo\\bar"), - FILE_PATH_LITERAL("C:\\foo\\bar.htm") - }, - { - "image/png", - FILE_PATH_LITERAL("C:\\bar.html"), - FILE_PATH_LITERAL("C:\\bar.html") - }, - { - "image/png", - FILE_PATH_LITERAL("C:\\bar"), - FILE_PATH_LITERAL("C:\\bar.png") - }, - { - "text/html", - FILE_PATH_LITERAL("C:\\foo\\bar.exe"), - FILE_PATH_LITERAL("C:\\foo\\bar.exe") - }, - { - "image/gif", - FILE_PATH_LITERAL("C:\\foo\\bar.exe"), - FILE_PATH_LITERAL("C:\\foo\\bar.exe") - }, - { - "text/html", - FILE_PATH_LITERAL("C:\\foo\\google.com"), - FILE_PATH_LITERAL("C:\\foo\\google.com") - }, - { - "text/html", - FILE_PATH_LITERAL("C:\\foo\\con.htm"), - FILE_PATH_LITERAL("C:\\foo\\_con.htm") - }, - { - "text/html", - FILE_PATH_LITERAL("C:\\foo\\con"), - FILE_PATH_LITERAL("C:\\foo\\_con.htm") - }, - { - "text/html", - FILE_PATH_LITERAL("C:\\foo\\harmless.{not-really-this-may-be-a-guid}"), - FILE_PATH_LITERAL("C:\\foo\\harmless.download") - }, - { - "text/html", - FILE_PATH_LITERAL("C:\\foo\\harmless.local"), - FILE_PATH_LITERAL("C:\\foo\\harmless.download") - }, - { - "text/html", - FILE_PATH_LITERAL("C:\\foo\\harmless.lnk"), - FILE_PATH_LITERAL("C:\\foo\\harmless.download") - }, - { - "text/html", - FILE_PATH_LITERAL("C:\\foo\\harmless.{mismatched-"), - FILE_PATH_LITERAL("C:\\foo\\harmless.{mismatched-") - }, - // Allow extension synonyms. - { - "image/jpeg", - FILE_PATH_LITERAL("C:\\foo\\bar.jpg"), - FILE_PATH_LITERAL("C:\\foo\\bar.jpg") - }, - { - "image/jpeg", - FILE_PATH_LITERAL("C:\\foo\\bar.jpeg"), - FILE_PATH_LITERAL("C:\\foo\\bar.jpeg") - }, -#else // !defined(OS_WIN) - { - "text/html", - FILE_PATH_LITERAL("/foo/bar.htm"), - FILE_PATH_LITERAL("/foo/bar.htm") - }, - { - "text/html", - FILE_PATH_LITERAL("/foo/bar.html"), - FILE_PATH_LITERAL("/foo/bar.html") - }, - { - "text/html", - FILE_PATH_LITERAL("/foo/bar"), - FILE_PATH_LITERAL("/foo/bar.html") - }, - { - "image/png", - FILE_PATH_LITERAL("/bar.html"), - FILE_PATH_LITERAL("/bar.html") - }, - { - "image/png", - FILE_PATH_LITERAL("/bar"), - FILE_PATH_LITERAL("/bar.png") - }, - { - "image/gif", - FILE_PATH_LITERAL("/foo/bar.exe"), - FILE_PATH_LITERAL("/foo/bar.exe") - }, - { - "text/html", - FILE_PATH_LITERAL("/foo/google.com"), - FILE_PATH_LITERAL("/foo/google.com") - }, - { - "text/html", - FILE_PATH_LITERAL("/foo/con.htm"), - FILE_PATH_LITERAL("/foo/con.htm") - }, - { - "text/html", - FILE_PATH_LITERAL("/foo/con"), - FILE_PATH_LITERAL("/foo/con.html") - }, - // Allow extension synonyms. - { - "image/jpeg", - FILE_PATH_LITERAL("/bar.jpg"), - FILE_PATH_LITERAL("/bar.jpg") - }, - { - "image/jpeg", - FILE_PATH_LITERAL("/bar.jpeg"), - FILE_PATH_LITERAL("/bar.jpeg") - }, -#endif // !defined(OS_WIN) - }; - - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(safe_tests); ++i) { - base::FilePath file_path(safe_tests[i].filename); - GenerateSafeFileName(safe_tests[i].mime_type, false, &file_path); - EXPECT_EQ(safe_tests[i].expected_filename, file_path.value()) - << "Iteration " << i; - } -} - -TEST(FilenameUtilTest, GenerateFileName) { -#if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID) - // This test doesn't run when the locale is not UTF-8 because some of the - // string conversions fail. This is OK (we have the default value) but they - // don't match our expectations. - std::string locale = setlocale(LC_CTYPE, NULL); - StringToLowerASCII(&locale); - EXPECT_TRUE(locale.find("utf-8") != std::string::npos || - locale.find("utf8") != std::string::npos) - << "Your locale (" << locale << ") must be set to UTF-8 " - << "for this test to pass!"; -#endif - - // Tests whether the correct filename is selected from the the given - // parameters and that Content-Disposition headers are properly - // handled including failovers when the header is malformed. - const GenerateFilenameCase selection_tests[] = { - { - __LINE__, - "http://www.google.com/", - "attachment; filename=test.html", - "", - "", - "", - L"", - L"test.html" - }, - { - __LINE__, - "http://www.google.com/", - "attachment; filename=\"test.html\"", - "", - "", - "", - L"", - L"test.html" - }, - { - __LINE__, - "http://www.google.com/", - "attachment; filename= \"test.html\"", - "", - "", - "", - L"", - L"test.html" - }, - { - __LINE__, - "http://www.google.com/", - "attachment; filename = \"test.html\"", - "", - "", - "", - L"", - L"test.html" - }, - { // filename is whitespace. Should failover to URL host - __LINE__, - "http://www.google.com/", - "attachment; filename= ", - "", - "", - "", - L"", - L"www.google.com" - }, - { // No filename. - __LINE__, - "http://www.google.com/path/test.html", - "attachment", - "", - "", - "", - L"", - L"test.html" - }, - { // Ditto - __LINE__, - "http://www.google.com/path/test.html", - "attachment;", - "", - "", - "", - L"", - L"test.html" - }, - { // No C-D - __LINE__, - "http://www.google.com/", - "", - "", - "", - "", - L"", - L"www.google.com" - }, - { - __LINE__, - "http://www.google.com/test.html", - "", - "", - "", - "", - L"", - L"test.html" - }, - { // Now that we use src/url's ExtractFileName, this case falls back to - // the hostname. If this behavior is not desirable, we'd better change - // ExtractFileName (in url_parse). - __LINE__, - "http://www.google.com/path/", - "", - "", - "", - "", - L"", - L"www.google.com" - }, - { - __LINE__, - "http://www.google.com/path", - "", - "", - "", - "", - L"", - L"path" - }, - { - __LINE__, - "file:///", - "", - "", - "", - "", - L"", - L"download" - }, - { - __LINE__, - "file:///path/testfile", - "", - "", - "", - "", - L"", - L"testfile" - }, - { - __LINE__, - "non-standard-scheme:", - "", - "", - "", - "", - L"", - L"download" - }, - { // C-D should override default - __LINE__, - "http://www.google.com/", - "attachment; filename =\"test.html\"", - "", - "", - "", - L"download", - L"test.html" - }, - { // But the URL shouldn't - __LINE__, - "http://www.google.com/", - "", - "", - "", - "", - L"download", - L"download" - }, - { - __LINE__, - "http://www.google.com/", - "attachment; filename=\"../test.html\"", - "", - "", - "", - L"", - L"-test.html" - }, - { - __LINE__, - "http://www.google.com/", - "attachment; filename=\"..\\test.html\"", - "", - "", - "", - L"", - L"test.html" - }, - { - __LINE__, - "http://www.google.com/", - "attachment; filename=\"..\\\\test.html\"", - "", - "", - "", - L"", - L"-test.html" - }, - { // Filename disappears after leading and trailing periods are removed. - __LINE__, - "http://www.google.com/", - "attachment; filename=\"..\"", - "", - "", - "", - L"default", - L"default" - }, - { // C-D specified filename disappears. Failover to final filename. - __LINE__, - "http://www.google.com/test.html", - "attachment; filename=\"..\"", - "", - "", - "", - L"default", - L"default" - }, - // Below is a small subset of cases taken from HttpContentDisposition tests. - { - __LINE__, - "http://www.google.com/", - "attachment; filename=\"%EC%98%88%EC%88%A0%20" - "%EC%98%88%EC%88%A0.jpg\"", - "", - "", - "", - L"", - L"\uc608\uc220 \uc608\uc220.jpg" - }, - { - __LINE__, - "http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg", - "", - "", - "", - "", - L"download", - L"\uc608\uc220 \uc608\uc220.jpg" - }, - { - __LINE__, - "http://www.google.com/", - "attachment;", - "", - "", - "", - L"\uB2E4\uC6B4\uB85C\uB4DC", - L"\uB2E4\uC6B4\uB85C\uB4DC" - }, - { - __LINE__, - "http://www.google.com/", - "attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" - "D13=2Epng?=\"", - "", - "", - "", - L"download", - L"\u82b8\u88533.png" - }, - { - __LINE__, - "http://www.example.com/images?id=3", - "attachment; filename=caf\xc3\xa9.png", - "iso-8859-1", - "", - "", - L"", - L"caf\u00e9.png" - }, - { - __LINE__, - "http://www.example.com/images?id=3", - "attachment; filename=caf\xe5.png", - "windows-1253", - "", - "", - L"", - L"caf\u03b5.png" - }, - { - __LINE__, - "http://www.example.com/file?id=3", - "attachment; name=\xcf\xc2\xd4\xd8.zip", - "GBK", - "", - "", - L"", - L"\u4e0b\u8f7d.zip" - }, - { // Invalid C-D header. Extracts filename from url. - __LINE__, - "http://www.google.com/test.html", - "attachment; filename==?iiso88591?Q?caf=EG?=", - "", - "", - "", - L"", - L"test.html" - }, - // about: and data: URLs - { - __LINE__, - "about:chrome", - "", - "", - "", - "", - L"", - L"download" - }, - { - __LINE__, - "data:,looks/like/a.path", - "", - "", - "", - "", - L"", - L"download" - }, - { - __LINE__, - "data:text/plain;base64,VG8gYmUgb3Igbm90IHRvIGJlLg=", - "", - "", - "", - "", - L"", - L"download" - }, - { - __LINE__, - "data:,looks/like/a.path", - "", - "", - "", - "", - L"default_filename_is_given", - L"default_filename_is_given" - }, - { - __LINE__, - "data:,looks/like/a.path", - "", - "", - "", - "", - L"\u65e5\u672c\u8a9e", // Japanese Kanji. - L"\u65e5\u672c\u8a9e" - }, - { // The filename encoding is specified by the referrer charset. - __LINE__, - "http://example.com/V%FDvojov%E1%20psychologie.doc", - "", - "iso-8859-1", - "", - "", - L"", - L"V\u00fdvojov\u00e1 psychologie.doc" - }, - { // Suggested filename takes precedence over URL - __LINE__, - "http://www.google.com/test", - "", - "", - "suggested", - "", - L"", - L"suggested" - }, - { // The content-disposition has higher precedence over the suggested name. - __LINE__, - "http://www.google.com/test", - "attachment; filename=test.html", - "", - "suggested", - "", - L"", - L"test.html" - }, -#if 0 - { // The filename encoding doesn't match the referrer charset, the system - // charset, or UTF-8. - // TODO(jshin): we need to handle this case. - __LINE__, - "http://example.com/V%FDvojov%E1%20psychologie.doc", - "", - "utf-8", - "", - "", - L"", - L"V\u00fdvojov\u00e1 psychologie.doc", - }, -#endif - // Raw 8bit characters in C-D - { - __LINE__, - "http://www.example.com/images?id=3", - "attachment; filename=caf\xc3\xa9.png", - "iso-8859-1", - "", - "image/png", - L"", - L"caf\u00e9.png" - }, - { - __LINE__, - "http://www.example.com/images?id=3", - "attachment; filename=caf\xe5.png", - "windows-1253", - "", - "image/png", - L"", - L"caf\u03b5.png" - }, - { // No 'filename' keyword in the disposition, use the URL - __LINE__, - "http://www.evil.com/my_download.txt", - "a_file_name.txt", - "", - "", - "text/plain", - L"download", - L"my_download.txt" - }, - { // Spaces in the disposition file name - __LINE__, - "http://www.frontpagehacker.com/a_download.exe", - "filename=My Downloaded File.exe", - "", - "", - "application/octet-stream", - L"download", - L"My Downloaded File.exe" - }, - { // % encoded - __LINE__, - "http://www.examples.com/", - "attachment; " - "filename=\"%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg\"", - "", - "", - "image/jpeg", - L"download", - L"\uc608\uc220 \uc608\uc220.jpg" - }, - { // name= parameter - __LINE__, - "http://www.examples.com/q.cgi?id=abc", - "attachment; name=abc de.pdf", - "", - "", - "application/octet-stream", - L"download", - L"abc de.pdf" - }, - { - __LINE__, - "http://www.example.com/path", - "filename=\"=?EUC-JP?Q?=B7=DD=BD=D13=2Epng?=\"", - "", - "", - "image/png", - L"download", - L"\x82b8\x8853" L"3.png" - }, - { // The following two have invalid CD headers and filenames come from the - // URL. - __LINE__, - "http://www.example.com/test%20123", - "attachment; filename==?iiso88591?Q?caf=EG?=", - "", - "", - "image/jpeg", - L"download", - L"test 123" JPEG_EXT - }, - { - __LINE__, - "http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg", - "malformed_disposition", - "", - "", - "image/jpeg", - L"download", - L"\uc608\uc220 \uc608\uc220.jpg" - }, - { // Invalid C-D. No filename from URL. Falls back to 'download'. - __LINE__, - "http://www.google.com/path1/path2/", - "attachment; filename==?iso88591?Q?caf=E3?", - "", - "", - "image/jpeg", - L"download", - L"download" JPEG_EXT - }, - }; - - // Tests filename generation. Once the correct filename is - // selected, they should be passed through the validation steps and - // a correct extension should be added if necessary. - const GenerateFilenameCase generation_tests[] = { - // Dotfiles. Ensures preceeding period(s) stripped. - { - __LINE__, - "http://www.google.com/.test.html", - "", - "", - "", - "", - L"", - L"test.html" - }, - { - __LINE__, - "http://www.google.com/.test", - "", - "", - "", - "", - L"", - L"test" - }, - { - __LINE__, - "http://www.google.com/..test", - "", - "", - "", - "", - L"", - L"test" - }, - { // Disposition has relative paths, remove directory separators - __LINE__, - "http://www.evil.com/my_download.txt", - "filename=../../../../././../a_file_name.txt", - "", - "", - "text/plain", - L"download", - L"-..-..-..-.-.-..-a_file_name.txt" - }, - { // Disposition has parent directories, remove directory separators - __LINE__, - "http://www.evil.com/my_download.txt", - "filename=dir1/dir2/a_file_name.txt", - "", - "", - "text/plain", - L"download", - L"dir1-dir2-a_file_name.txt" - }, - { // Disposition has relative paths, remove directory separators - __LINE__, - "http://www.evil.com/my_download.txt", - "filename=..\\..\\..\\..\\.\\.\\..\\a_file_name.txt", - "", - "", - "text/plain", - L"download", - L"-..-..-..-.-.-..-a_file_name.txt" - }, - { // Disposition has parent directories, remove directory separators - __LINE__, - "http://www.evil.com/my_download.txt", - "filename=dir1\\dir2\\a_file_name.txt", - "", - "", - "text/plain", - L"download", - L"dir1-dir2-a_file_name.txt" - }, - { // No useful information in disposition or URL, use default - __LINE__, - "http://www.truncated.com/path/", - "", - "", - "", - "text/plain", - L"download", - L"download" TXT_EXT - }, - { // Filename looks like HTML? - __LINE__, - "http://www.evil.com/get/malware/here", - "filename=\"<blink>Hello kitty</blink>\"", - "", - "", - "text/plain", - L"default", - L"-blink-Hello kitty--blink-" TXT_EXT - }, - { // A normal avi should get .avi and not .avi.avi - __LINE__, - "https://blah.google.com/misc/2.avi", - "", - "", - "", - "video/x-msvideo", - L"download", - L"2.avi" - }, - { // Extension generation - __LINE__, - "http://www.example.com/my-cat", - "filename=my-cat", - "", - "", - "image/jpeg", - L"download", - L"my-cat" JPEG_EXT - }, - { - __LINE__, - "http://www.example.com/my-cat", - "filename=my-cat", - "", - "", - "text/plain", - L"download", - L"my-cat.txt" - }, - { - __LINE__, - "http://www.example.com/my-cat", - "filename=my-cat", - "", - "", - "text/html", - L"download", - L"my-cat" HTML_EXT - }, - { // Unknown MIME type - __LINE__, - "http://www.example.com/my-cat", - "filename=my-cat", - "", - "", - "dance/party", - L"download", - L"my-cat" - }, - { - __LINE__, - "http://www.example.com/my-cat.jpg", - "filename=my-cat.jpg", - "", - "", - "text/plain", - L"download", - L"my-cat.jpg" - }, - // Windows specific tests -#if defined(OS_WIN) - { - __LINE__, - "http://www.goodguy.com/evil.exe", - "filename=evil.exe", - "", - "", - "image/jpeg", - L"download", - L"evil.exe" - }, - { - __LINE__, - "http://www.goodguy.com/ok.exe", - "filename=ok.exe", - "", - "", - "binary/octet-stream", - L"download", - L"ok.exe" - }, - { - __LINE__, - "http://www.goodguy.com/evil.dll", - "filename=evil.dll", - "", - "", - "dance/party", - L"download", - L"evil.dll" - }, - { - __LINE__, - "http://www.goodguy.com/evil.exe", - "filename=evil", - "", - "", - "application/rss+xml", - L"download", - L"evil" - }, - // Test truncation of trailing dots and spaces - { - __LINE__, - "http://www.goodguy.com/evil.exe ", - "filename=evil.exe ", - "", - "", - "binary/octet-stream", - L"download", - L"evil.exe" - }, - { - __LINE__, - "http://www.goodguy.com/evil.exe.", - "filename=evil.exe.", - "", - "", - "binary/octet-stream", - L"download", - L"evil.exe-" - }, - { - __LINE__, - "http://www.goodguy.com/evil.exe. . .", - "filename=evil.exe. . .", - "", - "", - "binary/octet-stream", - L"download", - L"evil.exe-------" - }, - { - __LINE__, - "http://www.goodguy.com/evil.", - "filename=evil.", - "", - "", - "binary/octet-stream", - L"download", - L"evil-" - }, - { - __LINE__, - "http://www.goodguy.com/. . . . .", - "filename=. . . . .", - "", - "", - "binary/octet-stream", - L"download", - L"download" - }, - { - __LINE__, - "http://www.badguy.com/attachment?name=meh.exe%C2%A0", - "attachment; filename=\"meh.exe\xC2\xA0\"", - "", - "", - "binary/octet-stream", - L"", - L"meh.exe-" - }, -#endif // OS_WIN - { - __LINE__, - "http://www.goodguy.com/utils.js", - "filename=utils.js", - "", - "", - "application/x-javascript", - L"download", - L"utils.js" - }, - { - __LINE__, - "http://www.goodguy.com/contacts.js", - "filename=contacts.js", - "", - "", - "application/json", - L"download", - L"contacts.js" - }, - { - __LINE__, - "http://www.goodguy.com/utils.js", - "filename=utils.js", - "", - "", - "text/javascript", - L"download", - L"utils.js" - }, - { - __LINE__, - "http://www.goodguy.com/utils.js", - "filename=utils.js", - "", - "", - "text/javascript;version=2", - L"download", - L"utils.js" - }, - { - __LINE__, - "http://www.goodguy.com/utils.js", - "filename=utils.js", - "", - "", - "application/ecmascript", - L"download", - L"utils.js" - }, - { - __LINE__, - "http://www.goodguy.com/utils.js", - "filename=utils.js", - "", - "", - "application/ecmascript;version=4", - L"download", - L"utils.js" - }, - { - __LINE__, - "http://www.goodguy.com/program.exe", - "filename=program.exe", - "", - "", - "application/foo-bar", - L"download", - L"program.exe" - }, - { - __LINE__, - "http://www.evil.com/../foo.txt", - "filename=../foo.txt", - "", - "", - "text/plain", - L"download", - L"-foo.txt" - }, - { - __LINE__, - "http://www.evil.com/..\\foo.txt", - "filename=..\\foo.txt", - "", - "", - "text/plain", - L"download", - L"-foo.txt" - }, - { - __LINE__, - "http://www.evil.com/.hidden", - "filename=.hidden", - "", - "", - "text/plain", - L"download", - L"hidden" TXT_EXT - }, - { - __LINE__, - "http://www.evil.com/trailing.", - "filename=trailing.", - "", - "", - "dance/party", - L"download", -#if defined(OS_WIN) - L"trailing-" -#else - L"trailing" -#endif - }, - { - __LINE__, - "http://www.evil.com/trailing.", - "filename=trailing.", - "", - "", - "text/plain", - L"download", -#if defined(OS_WIN) - L"trailing-" TXT_EXT -#else - L"trailing" TXT_EXT -#endif - }, - { - __LINE__, - "http://www.evil.com/.", - "filename=.", - "", - "", - "dance/party", - L"download", - L"download" - }, - { - __LINE__, - "http://www.evil.com/..", - "filename=..", - "", - "", - "dance/party", - L"download", - L"download" - }, - { - __LINE__, - "http://www.evil.com/...", - "filename=...", - "", - "", - "dance/party", - L"download", - L"download" - }, - { // Note that this one doesn't have "filename=" on it. - __LINE__, - "http://www.evil.com/", - "a_file_name.txt", - "", - "", - "image/jpeg", - L"download", - L"download" JPEG_EXT - }, - { - __LINE__, - "http://www.evil.com/", - "filename=", - "", - "", - "image/jpeg", - L"download", - L"download" JPEG_EXT - }, - { - __LINE__, - "http://www.example.com/simple", - "filename=simple", - "", - "", - "application/octet-stream", - L"download", - L"simple" - }, - // Reserved words on Windows - { - __LINE__, - "http://www.goodguy.com/COM1", - "filename=COM1", - "", - "", - "application/foo-bar", - L"download", -#if defined(OS_WIN) - L"_COM1" -#else - L"COM1" -#endif - }, - { - __LINE__, - "http://www.goodguy.com/COM4.txt", - "filename=COM4.txt", - "", - "", - "text/plain", - L"download", -#if defined(OS_WIN) - L"_COM4.txt" -#else - L"COM4.txt" -#endif - }, - { - __LINE__, - "http://www.goodguy.com/lpt1.TXT", - "filename=lpt1.TXT", - "", - "", - "text/plain", - L"download", -#if defined(OS_WIN) - L"_lpt1.TXT" -#else - L"lpt1.TXT" -#endif - }, - { - __LINE__, - "http://www.goodguy.com/clock$.txt", - "filename=clock$.txt", - "", - "", - "text/plain", - L"download", -#if defined(OS_WIN) - L"_clock$.txt" -#else - L"clock$.txt" -#endif - }, - { // Validation should also apply to sugested name - __LINE__, - "http://www.goodguy.com/blah$.txt", - "filename=clock$.txt", - "", - "clock$.txt", - "text/plain", - L"download", -#if defined(OS_WIN) - L"_clock$.txt" -#else - L"clock$.txt" -#endif - }, - { - __LINE__, - "http://www.goodguy.com/mycom1.foo", - "filename=mycom1.foo", - "", - "", - "text/plain", - L"download", - L"mycom1.foo" - }, - { - __LINE__, - "http://www.badguy.com/Setup.exe.local", - "filename=Setup.exe.local", - "", - "", - "application/foo-bar", - L"download", -#if defined(OS_WIN) - L"Setup.exe.download" -#else - L"Setup.exe.local" -#endif - }, - { - __LINE__, - "http://www.badguy.com/Setup.exe.local", - "filename=Setup.exe.local.local", - "", - "", - "application/foo-bar", - L"download", -#if defined(OS_WIN) - L"Setup.exe.local.download" -#else - L"Setup.exe.local.local" -#endif - }, - { - __LINE__, - "http://www.badguy.com/Setup.exe.lnk", - "filename=Setup.exe.lnk", - "", - "", - "application/foo-bar", - L"download", -#if defined(OS_WIN) - L"Setup.exe.download" -#else - L"Setup.exe.lnk" -#endif - }, - { - __LINE__, - "http://www.badguy.com/Desktop.ini", - "filename=Desktop.ini", - "", - "", - "application/foo-bar", - L"download", -#if defined(OS_WIN) - L"_Desktop.ini" -#else - L"Desktop.ini" -#endif - }, - { - __LINE__, - "http://www.badguy.com/Thumbs.db", - "filename=Thumbs.db", - "", - "", - "application/foo-bar", - L"download", -#if defined(OS_WIN) - L"_Thumbs.db" -#else - L"Thumbs.db" -#endif - }, - { - __LINE__, - "http://www.hotmail.com", - "filename=source.jpg", - "", - "", - "application/x-javascript", - L"download", - L"source.jpg" - }, - { // http://crbug.com/5772. - __LINE__, - "http://www.example.com/foo.tar.gz", - "", - "", - "", - "application/x-tar", - L"download", - L"foo.tar.gz" - }, - { // http://crbug.com/52250. - __LINE__, - "http://www.example.com/foo.tgz", - "", - "", - "", - "application/x-tar", - L"download", - L"foo.tgz" - }, - { // http://crbug.com/7337. - __LINE__, - "http://maged.lordaeron.org/blank.reg", - "", - "", - "", - "text/x-registry", - L"download", - L"blank.reg" - }, - { - __LINE__, - "http://www.example.com/bar.tar", - "", - "", - "", - "application/x-tar", - L"download", - L"bar.tar" - }, - { - __LINE__, - "http://www.example.com/bar.bogus", - "", - "", - "", - "application/x-tar", - L"download", - L"bar.bogus" - }, - { // http://crbug.com/20337 - __LINE__, - "http://www.example.com/.download.txt", - "filename=.download.txt", - "", - "", - "text/plain", - L"-download", - L"download.txt" - }, - { // http://crbug.com/56855. - __LINE__, - "http://www.example.com/bar.sh", - "", - "", - "", - "application/x-sh", - L"download", - L"bar.sh" - }, - { // http://crbug.com/61571 - __LINE__, - "http://www.example.com/npdf.php?fn=foobar.pdf", - "", - "", - "", - "text/plain", - L"download", - L"npdf" TXT_EXT - }, - { // Shouldn't overwrite C-D specified extension. - __LINE__, - "http://www.example.com/npdf.php?fn=foobar.pdf", - "filename=foobar.jpg", - "", - "", - "text/plain", - L"download", - L"foobar.jpg" - }, - { // http://crbug.com/87719 - __LINE__, - "http://www.example.com/image.aspx?id=blargh", - "", - "", - "", - "image/jpeg", - L"download", - L"image" JPEG_EXT - }, -#if defined(OS_CHROMEOS) - { // http://crosbug.com/26028 - __LINE__, - "http://www.example.com/fooa%cc%88.txt", - "", - "", - "", - "image/jpeg", - L"foo\xe4", - L"foo\xe4.txt" - }, -#endif - }; - - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(selection_tests); ++i) - RunGenerateFileNameTestCase(&selection_tests[i]); - - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(generation_tests); ++i) - RunGenerateFileNameTestCase(&generation_tests[i]); - - for (size_t i = 0; i < ARRAYSIZE_UNSAFE(generation_tests); ++i) { - GenerateFilenameCase test_case = generation_tests[i]; - test_case.referrer_charset = "GBK"; - RunGenerateFileNameTestCase(&test_case); - } -} - -} // namespace net diff --git a/net/base/net_util.cc b/net/base/net_util.cc index bafc835..0174c65 100644 --- a/net/base/net_util.cc +++ b/net/base/net_util.cc @@ -4,12 +4,9 @@ #include "net/base/net_util.h" -#include <errno.h> - #include <algorithm> #include <iterator> #include <map> -#include <set> #include "build/build_config.h" @@ -29,6 +26,10 @@ #endif #include "base/basictypes.h" +#include "base/file_util.h" +#include "base/files/file_path.h" +#include "base/i18n/file_util_icu.h" +#include "base/i18n/icu_string_conversions.h" #include "base/i18n/time_formatting.h" #include "base/json/string_escape.h" #include "base/lazy_instance.h" @@ -36,6 +37,7 @@ #include "base/memory/singleton.h" #include "base/message_loop/message_loop.h" #include "base/metrics/histogram.h" +#include "base/path_service.h" #include "base/stl_util.h" #include "base/strings/string_number_conversions.h" #include "base/strings/string_piece.h" @@ -60,6 +62,7 @@ #endif #include "net/base/dns_util.h" #include "net/base/escape.h" +#include "net/base/mime_util.h" #include "net/base/net_module.h" #include "net/base/registry_controlled_domains/registry_controlled_domain.h" #if defined(OS_WIN) @@ -82,6 +85,10 @@ namespace { typedef std::vector<size_t> Offsets; +// what we prepend to get a file URL +static const base::FilePath::CharType kFileURLPrefix[] = + FILE_PATH_LITERAL("file:///"); + // The general list of blocked ports. Will be blocked unless a specific // protocol overrides it. (Ex: ftp can use ports 20 and 21) static const int kRestrictedPorts[] = { @@ -749,6 +756,205 @@ void AppendFormattedComponent(const std::string& spec, } } +void SanitizeGeneratedFileName(base::FilePath::StringType* filename, + bool replace_trailing) { + const base::FilePath::CharType kReplace[] = FILE_PATH_LITERAL("-"); + if (filename->empty()) + return; + if (replace_trailing) { + // Handle CreateFile() stripping trailing dots and spaces on filenames + // http://support.microsoft.com/kb/115827 + size_t length = filename->size(); + size_t pos = filename->find_last_not_of(FILE_PATH_LITERAL(" .")); + filename->resize((pos == std::string::npos) ? 0 : (pos + 1)); + base::TrimWhitespace(*filename, base::TRIM_TRAILING, filename); + if (filename->empty()) + return; + size_t trimmed = length - filename->size(); + if (trimmed) + filename->insert(filename->end(), trimmed, kReplace[0]); + } + base::TrimString(*filename, FILE_PATH_LITERAL("."), filename); + if (filename->empty()) + return; + // Replace any path information by changing path separators. + ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("/"), kReplace); + ReplaceSubstringsAfterOffset(filename, 0, FILE_PATH_LITERAL("\\"), kReplace); +} + +// Returns the filename determined from the last component of the path portion +// of the URL. Returns an empty string if the URL doesn't have a path or is +// invalid. If the generated filename is not reliable, +// |should_overwrite_extension| will be set to true, in which case a better +// extension should be determined based on the content type. +std::string GetFileNameFromURL(const GURL& url, + const std::string& referrer_charset, + bool* should_overwrite_extension) { + // about: and data: URLs don't have file names, but esp. data: URLs may + // contain parts that look like ones (i.e., contain a slash). Therefore we + // don't attempt to divine a file name out of them. + if (!url.is_valid() || url.SchemeIs("about") || url.SchemeIs("data")) + return std::string(); + + const std::string unescaped_url_filename = UnescapeURLComponent( + url.ExtractFileName(), + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); + + // The URL's path should be escaped UTF-8, but may not be. + std::string decoded_filename = unescaped_url_filename; + if (!IsStringUTF8(decoded_filename)) { + // TODO(jshin): this is probably not robust enough. To be sure, we need + // encoding detection. + base::string16 utf16_output; + if (!referrer_charset.empty() && + base::CodepageToUTF16(unescaped_url_filename, + referrer_charset.c_str(), + base::OnStringConversionError::FAIL, + &utf16_output)) { + decoded_filename = base::UTF16ToUTF8(utf16_output); + } else { + decoded_filename = base::WideToUTF8( + base::SysNativeMBToWide(unescaped_url_filename)); + } + } + // If the URL contains a (possibly empty) query, assume it is a generator, and + // allow the determined extension to be overwritten. + *should_overwrite_extension = !decoded_filename.empty() && url.has_query(); + + return decoded_filename; +} + +// Returns whether the specified extension is automatically integrated into the +// windows shell. +bool IsShellIntegratedExtension(const base::FilePath::StringType& extension) { + base::FilePath::StringType extension_lower = StringToLowerASCII(extension); + + // http://msdn.microsoft.com/en-us/library/ms811694.aspx + // Right-clicking on shortcuts can be magical. + if ((extension_lower == FILE_PATH_LITERAL("local")) || + (extension_lower == FILE_PATH_LITERAL("lnk"))) + return true; + + // http://www.juniper.net/security/auto/vulnerabilities/vuln2612.html + // Files become magical if they end in a CLSID, so block such extensions. + if (!extension_lower.empty() && + (extension_lower[0] == FILE_PATH_LITERAL('{')) && + (extension_lower[extension_lower.length() - 1] == FILE_PATH_LITERAL('}'))) + return true; + return false; +} + +// Returns whether the specified file name is a reserved name on windows. +// This includes names like "com2.zip" (which correspond to devices) and +// desktop.ini and thumbs.db which have special meaning to the windows shell. +bool IsReservedName(const base::FilePath::StringType& filename) { + // This list is taken from the MSDN article "Naming a file" + // http://msdn2.microsoft.com/en-us/library/aa365247(VS.85).aspx + // I also added clock$ because GetSaveFileName seems to consider it as a + // reserved name too. + static const char* const known_devices[] = { + "con", "prn", "aux", "nul", "com1", "com2", "com3", "com4", "com5", + "com6", "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", "lpt4", + "lpt5", "lpt6", "lpt7", "lpt8", "lpt9", "clock$" + }; +#if defined(OS_WIN) + std::string filename_lower = StringToLowerASCII(base::WideToUTF8(filename)); +#elif defined(OS_POSIX) + std::string filename_lower = StringToLowerASCII(filename); +#endif + + for (size_t i = 0; i < arraysize(known_devices); ++i) { + // Exact match. + if (filename_lower == known_devices[i]) + return true; + // Starts with "DEVICE.". + if (filename_lower.find(std::string(known_devices[i]) + ".") == 0) + return true; + } + + static const char* const magic_names[] = { + // These file names are used by the "Customize folder" feature of the shell. + "desktop.ini", + "thumbs.db", + }; + + for (size_t i = 0; i < arraysize(magic_names); ++i) { + if (filename_lower == magic_names[i]) + return true; + } + + return false; +} + +// Examines the current extension in |file_name| and modifies it if necessary in +// order to ensure the filename is safe. If |file_name| doesn't contain an +// extension or if |ignore_extension| is true, then a new extension will be +// constructed based on the |mime_type|. +// +// We're addressing two things here: +// +// 1) Usability. If there is no reliable file extension, we want to guess a +// reasonable file extension based on the content type. +// +// 2) Shell integration. Some file extensions automatically integrate with the +// shell. We block these extensions to prevent a malicious web site from +// integrating with the user's shell. +void EnsureSafeExtension(const std::string& mime_type, + bool ignore_extension, + base::FilePath* file_name) { + // See if our file name already contains an extension. + base::FilePath::StringType extension = file_name->Extension(); + if (!extension.empty()) + extension.erase(extension.begin()); // Erase preceding '.'. + + if ((ignore_extension || extension.empty()) && !mime_type.empty()) { + base::FilePath::StringType preferred_mime_extension; + std::vector<base::FilePath::StringType> all_mime_extensions; + // The GetPreferredExtensionForMimeType call will end up going to disk. Do + // this on another thread to avoid slowing the IO thread. + // http://crbug.com/61827 + // TODO(asanka): Remove this ScopedAllowIO once all callers have switched + // over to IO safe threads. + base::ThreadRestrictions::ScopedAllowIO allow_io; + net::GetPreferredExtensionForMimeType(mime_type, &preferred_mime_extension); + net::GetExtensionsForMimeType(mime_type, &all_mime_extensions); + // If the existing extension is in the list of valid extensions for the + // given type, use it. This avoids doing things like pointlessly renaming + // "foo.jpg" to "foo.jpeg". + if (std::find(all_mime_extensions.begin(), + all_mime_extensions.end(), + extension) != all_mime_extensions.end()) { + // leave |extension| alone + } else if (!preferred_mime_extension.empty()) { + extension = preferred_mime_extension; + } + } + +#if defined(OS_WIN) + static const base::FilePath::CharType default_extension[] = + FILE_PATH_LITERAL("download"); + + // Rename shell-integrated extensions. + // TODO(asanka): Consider stripping out the bad extension and replacing it + // with the preferred extension for the MIME type if one is available. + if (IsShellIntegratedExtension(extension)) + extension.assign(default_extension); +#endif + + *file_name = file_name->ReplaceExtension(extension); +} + +bool FilePathToString16(const base::FilePath& path, base::string16* converted) { +#if defined(OS_WIN) + return base::WideToUTF16( + path.value().c_str(), path.value().size(), converted); +#elif defined(OS_POSIX) + std::string component8 = path.AsUTF8Unsafe(); + return !component8.empty() && + base::UTF8ToUTF16(component8.c_str(), component8.size(), converted); +#endif +} + bool IPNumberPrefixCheck(const IPAddressNumber& ip_number, const unsigned char* ip_prefix, size_t prefix_length_in_bits) { @@ -787,6 +993,46 @@ size_t GetCountOfExplicitlyAllowedPorts() { return g_explicitly_allowed_ports.Get().size(); } +GURL FilePathToFileURL(const base::FilePath& path) { + // Produce a URL like "file:///C:/foo" for a regular file, or + // "file://///server/path" for UNC. The URL canonicalizer will fix up the + // latter case to be the canonical UNC form: "file://server/path" + base::FilePath::StringType url_string(kFileURLPrefix); + if (!path.IsAbsolute()) { + base::FilePath current_dir; + PathService::Get(base::DIR_CURRENT, ¤t_dir); + url_string.append(current_dir.value()); + url_string.push_back(base::FilePath::kSeparators[0]); + } + url_string.append(path.value()); + + // Now do replacement of some characters. Since we assume the input is a + // literal filename, anything the URL parser might consider special should + // be escaped here. + + // must be the first substitution since others will introduce percents as the + // escape character + ReplaceSubstringsAfterOffset(&url_string, 0, + FILE_PATH_LITERAL("%"), FILE_PATH_LITERAL("%25")); + + // semicolon is supposed to be some kind of separator according to RFC 2396 + ReplaceSubstringsAfterOffset(&url_string, 0, + FILE_PATH_LITERAL(";"), FILE_PATH_LITERAL("%3B")); + + ReplaceSubstringsAfterOffset(&url_string, 0, + FILE_PATH_LITERAL("#"), FILE_PATH_LITERAL("%23")); + + ReplaceSubstringsAfterOffset(&url_string, 0, + FILE_PATH_LITERAL("?"), FILE_PATH_LITERAL("%3F")); + +#if defined(OS_POSIX) + ReplaceSubstringsAfterOffset(&url_string, 0, + FILE_PATH_LITERAL("\\"), FILE_PATH_LITERAL("%5C")); +#endif + + return GURL(url_string); +} + std::string GetSpecificHeader(const std::string& headers, const std::string& name) { // We want to grab the Value from the "Key: Value" pairs in the headers, @@ -955,6 +1201,166 @@ base::string16 StripWWWFromHost(const GURL& url) { return StripWWW(base::ASCIIToUTF16(url.host())); } +bool IsSafePortablePathComponent(const base::FilePath& component) { + base::string16 component16; + base::FilePath::StringType sanitized = component.value(); + SanitizeGeneratedFileName(&sanitized, true); + base::FilePath::StringType extension = component.Extension(); + if (!extension.empty()) + extension.erase(extension.begin()); // Erase preceding '.'. + return !component.empty() && + (component == component.BaseName()) && + (component == component.StripTrailingSeparators()) && + FilePathToString16(component, &component16) && + file_util::IsFilenameLegal(component16) && + !IsShellIntegratedExtension(extension) && + (sanitized == component.value()) && + !IsReservedName(component.value()); +} + +bool IsSafePortableRelativePath(const base::FilePath& path) { + if (path.empty() || path.IsAbsolute() || path.EndsWithSeparator()) + return false; + std::vector<base::FilePath::StringType> components; + path.GetComponents(&components); + if (components.empty()) + return false; + for (size_t i = 0; i < components.size() - 1; ++i) { + if (!IsSafePortablePathComponent(base::FilePath(components[i]))) + return false; + } + return IsSafePortablePathComponent(path.BaseName()); +} + +void GenerateSafeFileName(const std::string& mime_type, + bool ignore_extension, + base::FilePath* file_path) { + // Make sure we get the right file extension + EnsureSafeExtension(mime_type, ignore_extension, file_path); + +#if defined(OS_WIN) + // Prepend "_" to the file name if it's a reserved name + base::FilePath::StringType leaf_name = file_path->BaseName().value(); + DCHECK(!leaf_name.empty()); + if (IsReservedName(leaf_name)) { + leaf_name = base::FilePath::StringType(FILE_PATH_LITERAL("_")) + leaf_name; + *file_path = file_path->DirName(); + if (file_path->value() == base::FilePath::kCurrentDirectory) { + *file_path = base::FilePath(leaf_name); + } else { + *file_path = file_path->Append(leaf_name); + } + } +#endif +} + +base::string16 GetSuggestedFilename(const GURL& url, + const std::string& content_disposition, + const std::string& referrer_charset, + const std::string& suggested_name, + const std::string& mime_type, + const std::string& default_name) { + // TODO: this function to be updated to match the httpbis recommendations. + // Talk to abarth for the latest news. + + // We don't translate this fallback string, "download". If localization is + // needed, the caller should provide localized fallback in |default_name|. + static const base::FilePath::CharType kFinalFallbackName[] = + FILE_PATH_LITERAL("download"); + std::string filename; // In UTF-8 + bool overwrite_extension = false; + + // Try to extract a filename from content-disposition first. + if (!content_disposition.empty()) { + HttpContentDisposition header(content_disposition, referrer_charset); + filename = header.filename(); + } + + // Then try to use the suggested name. + if (filename.empty() && !suggested_name.empty()) + filename = suggested_name; + + // Now try extracting the filename from the URL. GetFileNameFromURL() only + // looks at the last component of the URL and doesn't return the hostname as a + // failover. + if (filename.empty()) + filename = GetFileNameFromURL(url, referrer_charset, &overwrite_extension); + + // Finally try the URL hostname, but only if there's no default specified in + // |default_name|. Some schemes (e.g.: file:, about:, data:) do not have a + // host name. + if (filename.empty() && + default_name.empty() && + url.is_valid() && + !url.host().empty()) { + // TODO(jungshik) : Decode a 'punycoded' IDN hostname. (bug 1264451) + filename = url.host(); + } + + bool replace_trailing = false; + base::FilePath::StringType result_str, default_name_str; +#if defined(OS_WIN) + replace_trailing = true; + result_str = base::UTF8ToUTF16(filename); + default_name_str = base::UTF8ToUTF16(default_name); +#else + result_str = filename; + default_name_str = default_name; +#endif + SanitizeGeneratedFileName(&result_str, replace_trailing); + if (result_str.find_last_not_of(FILE_PATH_LITERAL("-_")) == + base::FilePath::StringType::npos) { + result_str = !default_name_str.empty() ? default_name_str : + base::FilePath::StringType(kFinalFallbackName); + overwrite_extension = false; + } + file_util::ReplaceIllegalCharactersInPath(&result_str, '-'); + base::FilePath result(result_str); + GenerateSafeFileName(mime_type, overwrite_extension, &result); + + base::string16 result16; + if (!FilePathToString16(result, &result16)) { + result = base::FilePath(default_name_str); + if (!FilePathToString16(result, &result16)) { + result = base::FilePath(kFinalFallbackName); + FilePathToString16(result, &result16); + } + } + return result16; +} + +base::FilePath GenerateFileName(const GURL& url, + const std::string& content_disposition, + const std::string& referrer_charset, + const std::string& suggested_name, + const std::string& mime_type, + const std::string& default_file_name) { + base::string16 file_name = GetSuggestedFilename(url, + content_disposition, + referrer_charset, + suggested_name, + mime_type, + default_file_name); + +#if defined(OS_WIN) + base::FilePath generated_name(file_name); +#else + base::FilePath generated_name( + base::SysWideToNativeMB(base::UTF16ToWide(file_name))); +#endif + +#if defined(OS_CHROMEOS) + // When doing file manager operations on ChromeOS, the file paths get + // normalized in WebKit layer, so let's ensure downloaded files have + // normalized names. Otherwise, we won't be able to handle files with NFD + // utf8 encoded characters in name. + file_util::NormalizeFileNameEncoding(&generated_name); +#endif + + DCHECK(!generated_name.empty()); + + return generated_name; +} bool IsPortAllowedByDefault(int port) { int array_size = arraysize(kRestrictedPorts); diff --git a/net/base/net_util.h b/net/base/net_util.h index fdf103b..79116ce 100644 --- a/net/base/net_util.h +++ b/net/base/net_util.h @@ -28,6 +28,7 @@ class GURL; namespace base { +class FilePath; class Time; } @@ -75,6 +76,16 @@ NET_EXPORT extern const FormatUrlType kFormatUrlOmitAll; // Returns the number of explicitly allowed ports; for testing. NET_EXPORT_PRIVATE extern size_t GetCountOfExplicitlyAllowedPorts(); +// Given the full path to a file name, creates a file: URL. The returned URL +// may not be valid if the input is malformed. +NET_EXPORT GURL FilePathToFileURL(const base::FilePath& path); + +// Converts a file: URL back to a filename that can be passed to the OS. The +// file URL must be well-formed (GURL::is_valid() must return true); we don't +// handle degenerate cases here. Returns true on success, false if it isn't a +// valid file URL. On failure, *file_path will be empty. +NET_EXPORT bool FileURLToFilePath(const GURL& url, base::FilePath* file_path); + // Splits an input of the form <host>[":"<port>] into its consitituent parts. // Saves the result into |*host| and |*port|. If the input did not have // the optional port, sets |*port| to -1. @@ -244,6 +255,89 @@ NET_EXPORT base::string16 StripWWW(const base::string16& text); // Runs |url|'s host through StripWWW(). |url| must be valid. NET_EXPORT base::string16 StripWWWFromHost(const GURL& url); +// Generates a filename using the first successful method from the following (in +// order): +// +// 1) The raw Content-Disposition header in |content_disposition| as read from +// the network. |referrer_charset| is used to decode non-ASCII strings. +// 2) |suggested_name| if specified. |suggested_name| is assumed to be in +// UTF-8. +// 3) The filename extracted from the |url|. |referrer_charset| will be used to +// interpret the URL if there are non-ascii characters. +// 4) |default_name|. If non-empty, |default_name| is assumed to be a filename +// and shouldn't contain a path. |default_name| is not subject to validation +// or sanitization, and therefore shouldn't be a user supplied string. +// 5) The hostname portion from the |url| +// +// Then, leading and trailing '.'s will be removed. On Windows, trailing spaces +// are also removed. The string "download" is the final fallback if no filename +// is found or the filename is empty. +// +// Any illegal characters in the filename will be replaced by '-'. If the +// filename doesn't contain an extension, and a |mime_type| is specified, the +// preferred extension for the |mime_type| will be appended to the filename. +// The resulting filename is then checked against a list of reserved names on +// Windows. If the name is reserved, an underscore will be prepended to the +// filename. +// +// Note: |mime_type| should only be specified if this function is called from a +// thread that allows IO. +NET_EXPORT base::string16 GetSuggestedFilename( + const GURL& url, + const std::string& content_disposition, + const std::string& referrer_charset, + const std::string& suggested_name, + const std::string& mime_type, + const std::string& default_name); + +// Similar to GetSuggestedFilename(), but returns a FilePath. +NET_EXPORT base::FilePath GenerateFileName( + const GURL& url, + const std::string& content_disposition, + const std::string& referrer_charset, + const std::string& suggested_name, + const std::string& mime_type, + const std::string& default_name); + +// Valid components: +// * are not empty +// * are not Windows reserved names (CON, NUL.zip, etc.) +// * do not have trailing separators +// * do not equal kCurrentDirectory +// * do not reference the parent directory +// * do not contain illegal characters +// * do not end with Windows shell-integrated extensions (even on posix) +// * do not begin with '.' (which would hide them in most file managers) +// * do not end with ' ' or '.' +NET_EXPORT bool IsSafePortablePathComponent(const base::FilePath& component); + +// Basenames of valid relative paths are IsSafePortableBasename(), and internal +// path components of valid relative paths are valid path components as +// described above IsSafePortableBasename(). Valid relative paths are not +// absolute paths. +NET_EXPORT bool IsSafePortableRelativePath(const base::FilePath& path); + +// Ensures that the filename and extension is safe to use in the filesystem. +// +// Assumes that |file_path| already contains a valid path or file name. On +// Windows if the extension causes the file to have an unsafe interaction with +// the shell (see net_util::IsShellIntegratedExtension()), then it will be +// replaced by the string 'download'. If |file_path| doesn't contain an +// extension or |ignore_extension| is true then the preferred extension, if one +// exists, for |mime_type| will be used as the extension. +// +// On Windows, the filename will be checked against a set of reserved names, and +// if so, an underscore will be prepended to the name. +// +// |file_name| can either be just the file name or it can be a full path to a +// file. +// +// Note: |mime_type| should only be non-empty if this function is called from a +// thread that allows IO. +NET_EXPORT void GenerateSafeFileName(const std::string& mime_type, + bool ignore_extension, + base::FilePath* file_path); + // Checks |port| against a list of ports which are restricted by default. // Returns true if |port| is allowed, false if it is restricted. NET_EXPORT bool IsPortAllowedByDefault(int port); diff --git a/net/base/net_util_posix.cc b/net/base/net_util_posix.cc index 080f9b0..95b963e 100644 --- a/net/base/net_util_posix.cc +++ b/net/base/net_util_posix.cc @@ -85,6 +85,40 @@ void RemovePermanentIPv6AddressesWhereTemporaryExists( } // namespace +bool FileURLToFilePath(const GURL& url, base::FilePath* path) { + *path = base::FilePath(); + std::string& file_path_str = const_cast<std::string&>(path->value()); + file_path_str.clear(); + + if (!url.is_valid()) + return false; + + // Firefox seems to ignore the "host" of a file url if there is one. That is, + // file://foo/bar.txt maps to /bar.txt. + // TODO(dhg): This should probably take into account UNCs which could + // include a hostname other than localhost or blank + std::string old_path = url.path(); + + if (old_path.empty()) + return false; + + // GURL stores strings as percent-encoded 8-bit, this will undo if possible. + old_path = UnescapeURLComponent(old_path, + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); + + // Collapse multiple path slashes into a single path slash. + std::string new_path; + do { + new_path = old_path; + ReplaceSubstringsAfterOffset(&new_path, 0, "//", "/"); + old_path.swap(new_path); + } while (new_path != old_path); + + file_path_str.assign(old_path); + + return !file_path_str.empty(); +} + bool GetNetworkList(NetworkInterfaceList* networks, int policy) { #if defined(OS_ANDROID) std::string network_list = android::GetNetworkList(); diff --git a/net/base/net_util_unittest.cc b/net/base/net_util_unittest.cc index 98747e6..e62a644 100644 --- a/net/base/net_util_unittest.cc +++ b/net/base/net_util_unittest.cc @@ -17,6 +17,7 @@ #include "base/strings/sys_string_conversions.h" #include "base/strings/utf_string_conversions.h" #include "base/sys_byteorder.h" +#include "base/test/test_file_util.h" #include "base/time/time.h" #include "testing/gtest/include/gtest/gtest.h" #include "url/gurl.h" @@ -38,6 +39,11 @@ namespace { static const size_t kNpos = base::string16::npos; +struct FileCase { + const wchar_t* file; + const char* url; +}; + struct HeaderCase { const char* header_name; const char* expected; @@ -49,6 +55,12 @@ struct HeaderParamCase { const char* expected; }; +struct FileNameCDCase { + const char* header_field; + const char* referrer_charset; + const wchar_t* expected; +}; + const char* kLanguages[] = { "", "en", "zh-CN", "ja", "ko", "he", "ar", "ru", "el", "fr", @@ -384,6 +396,17 @@ struct CompliantHostCase { bool expected_output; }; +struct GenerateFilenameCase { + int lineno; + const char* url; + const char* content_disp_header; + const char* referrer_charset; + const char* suggested_filename; + const char* mime_type; + const wchar_t* default_filename; + const wchar_t* expected_filename; +}; + struct UrlTestData { const char* description; const char* input; @@ -472,8 +495,125 @@ std::string DumpIPNumber(const IPAddressNumber& v) { return out; } +void RunGenerateFileNameTestCase(const GenerateFilenameCase* test_case) { + std::string default_filename(base::WideToUTF8(test_case->default_filename)); + base::FilePath file_path = GenerateFileName( + GURL(test_case->url), test_case->content_disp_header, + test_case->referrer_charset, test_case->suggested_filename, + test_case->mime_type, default_filename); + EXPECT_EQ(test_case->expected_filename, + file_util::FilePathAsWString(file_path)) + << "test case at line number: " << test_case->lineno; +} + } // anonymous namespace +TEST(NetUtilTest, FileURLConversion) { + // a list of test file names and the corresponding URLs + const FileCase round_trip_cases[] = { +#if defined(OS_WIN) + {L"C:\\foo\\bar.txt", "file:///C:/foo/bar.txt"}, + {L"\\\\some computer\\foo\\bar.txt", + "file://some%20computer/foo/bar.txt"}, // UNC + {L"D:\\Name;with%some symbols*#", + "file:///D:/Name%3Bwith%25some%20symbols*%23"}, + // issue 14153: To be tested with the OS default codepage other than 1252. + {L"D:\\latin1\\caf\x00E9\x00DD.txt", + "file:///D:/latin1/caf%C3%A9%C3%9D.txt"}, + {L"D:\\otherlatin\\caf\x0119.txt", + "file:///D:/otherlatin/caf%C4%99.txt"}, + {L"D:\\greek\\\x03B1\x03B2\x03B3.txt", + "file:///D:/greek/%CE%B1%CE%B2%CE%B3.txt"}, + {L"D:\\Chinese\\\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc", + "file:///D:/Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD%91" + "%E9%A1%B5.doc"}, + {L"D:\\plane1\\\xD835\xDC00\xD835\xDC01.txt", // Math alphabet "AB" + "file:///D:/plane1/%F0%9D%90%80%F0%9D%90%81.txt"}, +#elif defined(OS_POSIX) + {L"/foo/bar.txt", "file:///foo/bar.txt"}, + {L"/foo/BAR.txt", "file:///foo/BAR.txt"}, + {L"/C:/foo/bar.txt", "file:///C:/foo/bar.txt"}, + {L"/foo/bar?.txt", "file:///foo/bar%3F.txt"}, + {L"/some computer/foo/bar.txt", "file:///some%20computer/foo/bar.txt"}, + {L"/Name;with%some symbols*#", "file:///Name%3Bwith%25some%20symbols*%23"}, + {L"/latin1/caf\x00E9\x00DD.txt", "file:///latin1/caf%C3%A9%C3%9D.txt"}, + {L"/otherlatin/caf\x0119.txt", "file:///otherlatin/caf%C4%99.txt"}, + {L"/greek/\x03B1\x03B2\x03B3.txt", "file:///greek/%CE%B1%CE%B2%CE%B3.txt"}, + {L"/Chinese/\x6240\x6709\x4e2d\x6587\x7f51\x9875.doc", + "file:///Chinese/%E6%89%80%E6%9C%89%E4%B8%AD%E6%96%87%E7%BD" + "%91%E9%A1%B5.doc"}, + {L"/plane1/\x1D400\x1D401.txt", // Math alphabet "AB" + "file:///plane1/%F0%9D%90%80%F0%9D%90%81.txt"}, +#endif + }; + + // First, we'll test that we can round-trip all of the above cases of URLs + base::FilePath output; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(round_trip_cases); i++) { + // convert to the file URL + GURL file_url(FilePathToFileURL( + file_util::WStringAsFilePath(round_trip_cases[i].file))); + EXPECT_EQ(round_trip_cases[i].url, file_url.spec()); + + // Back to the filename. + EXPECT_TRUE(FileURLToFilePath(file_url, &output)); + EXPECT_EQ(round_trip_cases[i].file, file_util::FilePathAsWString(output)); + } + + // Test that various file: URLs get decoded into the correct file type + FileCase url_cases[] = { +#if defined(OS_WIN) + {L"C:\\foo\\bar.txt", "file:c|/foo\\bar.txt"}, + {L"C:\\foo\\bar.txt", "file:/c:/foo/bar.txt"}, + {L"\\\\foo\\bar.txt", "file://foo\\bar.txt"}, + {L"C:\\foo\\bar.txt", "file:///c:/foo/bar.txt"}, + {L"\\\\foo\\bar.txt", "file:////foo\\bar.txt"}, + {L"\\\\foo\\bar.txt", "file:/foo/bar.txt"}, + {L"\\\\foo\\bar.txt", "file://foo\\bar.txt"}, + {L"C:\\foo\\bar.txt", "file:\\\\\\c:/foo/bar.txt"}, +#elif defined(OS_POSIX) + {L"/c:/foo/bar.txt", "file:/c:/foo/bar.txt"}, + {L"/c:/foo/bar.txt", "file:///c:/foo/bar.txt"}, + {L"/foo/bar.txt", "file:/foo/bar.txt"}, + {L"/c:/foo/bar.txt", "file:\\\\\\c:/foo/bar.txt"}, + {L"/foo/bar.txt", "file:foo/bar.txt"}, + {L"/bar.txt", "file://foo/bar.txt"}, + {L"/foo/bar.txt", "file:///foo/bar.txt"}, + {L"/foo/bar.txt", "file:////foo/bar.txt"}, + {L"/foo/bar.txt", "file:////foo//bar.txt"}, + {L"/foo/bar.txt", "file:////foo///bar.txt"}, + {L"/foo/bar.txt", "file:////foo////bar.txt"}, + {L"/c:/foo/bar.txt", "file:\\\\\\c:/foo/bar.txt"}, + {L"/c:/foo/bar.txt", "file:c:/foo/bar.txt"}, + // We get these wrong because GURL turns back slashes into forward + // slashes. + //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"}, + //{L"/c|/foo%5Cbar.txt", "file:c|/foo\\bar.txt"}, + //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"}, + //{L"/foo%5Cbar.txt", "file:////foo\\bar.txt"}, + //{L"/foo%5Cbar.txt", "file://foo\\bar.txt"}, +#endif + }; + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(url_cases); i++) { + FileURLToFilePath(GURL(url_cases[i].url), &output); + EXPECT_EQ(url_cases[i].file, file_util::FilePathAsWString(output)); + } + + // Unfortunately, UTF8ToWide discards invalid UTF8 input. +#ifdef BUG_878908_IS_FIXED + // Test that no conversion happens if the UTF-8 input is invalid, and that + // the input is preserved in UTF-8 + const char invalid_utf8[] = "file:///d:/Blah/\xff.doc"; + const wchar_t invalid_wide[] = L"D:\\Blah\\\xff.doc"; + EXPECT_TRUE(FileURLToFilePath( + GURL(std::string(invalid_utf8)), &output)); + EXPECT_EQ(std::wstring(invalid_wide), output); +#endif + + // Test that if a file URL is malformed, we get a failure + EXPECT_FALSE(FileURLToFilePath(GURL("filefoobar"), &output)); +} + TEST(NetUtilTest, GetIdentityFromURL) { struct { const char* input_url; @@ -664,6 +804,1422 @@ TEST(NetUtilTest, StripWWW) { EXPECT_EQ(ASCIIToUTF16("blah"), StripWWW(ASCIIToUTF16("blah"))); } +#if defined(OS_WIN) +#define JPEG_EXT L".jpg" +#define HTML_EXT L".htm" +#elif defined(OS_MACOSX) +#define JPEG_EXT L".jpeg" +#define HTML_EXT L".html" +#else +#define JPEG_EXT L".jpg" +#define HTML_EXT L".html" +#endif +#define TXT_EXT L".txt" +#define TAR_EXT L".tar" + +TEST(NetUtilTest, GenerateSafeFileName) { + const struct { + const char* mime_type; + const base::FilePath::CharType* filename; + const base::FilePath::CharType* expected_filename; + } safe_tests[] = { +#if defined(OS_WIN) + { + "text/html", + FILE_PATH_LITERAL("C:\\foo\\bar.htm"), + FILE_PATH_LITERAL("C:\\foo\\bar.htm") + }, + { + "text/html", + FILE_PATH_LITERAL("C:\\foo\\bar.html"), + FILE_PATH_LITERAL("C:\\foo\\bar.html") + }, + { + "text/html", + FILE_PATH_LITERAL("C:\\foo\\bar"), + FILE_PATH_LITERAL("C:\\foo\\bar.htm") + }, + { + "image/png", + FILE_PATH_LITERAL("C:\\bar.html"), + FILE_PATH_LITERAL("C:\\bar.html") + }, + { + "image/png", + FILE_PATH_LITERAL("C:\\bar"), + FILE_PATH_LITERAL("C:\\bar.png") + }, + { + "text/html", + FILE_PATH_LITERAL("C:\\foo\\bar.exe"), + FILE_PATH_LITERAL("C:\\foo\\bar.exe") + }, + { + "image/gif", + FILE_PATH_LITERAL("C:\\foo\\bar.exe"), + FILE_PATH_LITERAL("C:\\foo\\bar.exe") + }, + { + "text/html", + FILE_PATH_LITERAL("C:\\foo\\google.com"), + FILE_PATH_LITERAL("C:\\foo\\google.com") + }, + { + "text/html", + FILE_PATH_LITERAL("C:\\foo\\con.htm"), + FILE_PATH_LITERAL("C:\\foo\\_con.htm") + }, + { + "text/html", + FILE_PATH_LITERAL("C:\\foo\\con"), + FILE_PATH_LITERAL("C:\\foo\\_con.htm") + }, + { + "text/html", + FILE_PATH_LITERAL("C:\\foo\\harmless.{not-really-this-may-be-a-guid}"), + FILE_PATH_LITERAL("C:\\foo\\harmless.download") + }, + { + "text/html", + FILE_PATH_LITERAL("C:\\foo\\harmless.local"), + FILE_PATH_LITERAL("C:\\foo\\harmless.download") + }, + { + "text/html", + FILE_PATH_LITERAL("C:\\foo\\harmless.lnk"), + FILE_PATH_LITERAL("C:\\foo\\harmless.download") + }, + { + "text/html", + FILE_PATH_LITERAL("C:\\foo\\harmless.{mismatched-"), + FILE_PATH_LITERAL("C:\\foo\\harmless.{mismatched-") + }, + // Allow extension synonyms. + { + "image/jpeg", + FILE_PATH_LITERAL("C:\\foo\\bar.jpg"), + FILE_PATH_LITERAL("C:\\foo\\bar.jpg") + }, + { + "image/jpeg", + FILE_PATH_LITERAL("C:\\foo\\bar.jpeg"), + FILE_PATH_LITERAL("C:\\foo\\bar.jpeg") + }, +#else // !defined(OS_WIN) + { + "text/html", + FILE_PATH_LITERAL("/foo/bar.htm"), + FILE_PATH_LITERAL("/foo/bar.htm") + }, + { + "text/html", + FILE_PATH_LITERAL("/foo/bar.html"), + FILE_PATH_LITERAL("/foo/bar.html") + }, + { + "text/html", + FILE_PATH_LITERAL("/foo/bar"), + FILE_PATH_LITERAL("/foo/bar.html") + }, + { + "image/png", + FILE_PATH_LITERAL("/bar.html"), + FILE_PATH_LITERAL("/bar.html") + }, + { + "image/png", + FILE_PATH_LITERAL("/bar"), + FILE_PATH_LITERAL("/bar.png") + }, + { + "image/gif", + FILE_PATH_LITERAL("/foo/bar.exe"), + FILE_PATH_LITERAL("/foo/bar.exe") + }, + { + "text/html", + FILE_PATH_LITERAL("/foo/google.com"), + FILE_PATH_LITERAL("/foo/google.com") + }, + { + "text/html", + FILE_PATH_LITERAL("/foo/con.htm"), + FILE_PATH_LITERAL("/foo/con.htm") + }, + { + "text/html", + FILE_PATH_LITERAL("/foo/con"), + FILE_PATH_LITERAL("/foo/con.html") + }, + // Allow extension synonyms. + { + "image/jpeg", + FILE_PATH_LITERAL("/bar.jpg"), + FILE_PATH_LITERAL("/bar.jpg") + }, + { + "image/jpeg", + FILE_PATH_LITERAL("/bar.jpeg"), + FILE_PATH_LITERAL("/bar.jpeg") + }, +#endif // !defined(OS_WIN) + }; + + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(safe_tests); ++i) { + base::FilePath file_path(safe_tests[i].filename); + GenerateSafeFileName(safe_tests[i].mime_type, false, &file_path); + EXPECT_EQ(safe_tests[i].expected_filename, file_path.value()) + << "Iteration " << i; + } +} + +TEST(NetUtilTest, GenerateFileName) { +#if defined(OS_POSIX) && !defined(OS_MACOSX) && !defined(OS_ANDROID) + // This test doesn't run when the locale is not UTF-8 because some of the + // string conversions fail. This is OK (we have the default value) but they + // don't match our expectations. + std::string locale = setlocale(LC_CTYPE, NULL); + StringToLowerASCII(&locale); + EXPECT_TRUE(locale.find("utf-8") != std::string::npos || + locale.find("utf8") != std::string::npos) + << "Your locale (" << locale << ") must be set to UTF-8 " + << "for this test to pass!"; +#endif + + // Tests whether the correct filename is selected from the the given + // parameters and that Content-Disposition headers are properly + // handled including failovers when the header is malformed. + const GenerateFilenameCase selection_tests[] = { + { + __LINE__, + "http://www.google.com/", + "attachment; filename=test.html", + "", + "", + "", + L"", + L"test.html" + }, + { + __LINE__, + "http://www.google.com/", + "attachment; filename=\"test.html\"", + "", + "", + "", + L"", + L"test.html" + }, + { + __LINE__, + "http://www.google.com/", + "attachment; filename= \"test.html\"", + "", + "", + "", + L"", + L"test.html" + }, + { + __LINE__, + "http://www.google.com/", + "attachment; filename = \"test.html\"", + "", + "", + "", + L"", + L"test.html" + }, + { // filename is whitespace. Should failover to URL host + __LINE__, + "http://www.google.com/", + "attachment; filename= ", + "", + "", + "", + L"", + L"www.google.com" + }, + { // No filename. + __LINE__, + "http://www.google.com/path/test.html", + "attachment", + "", + "", + "", + L"", + L"test.html" + }, + { // Ditto + __LINE__, + "http://www.google.com/path/test.html", + "attachment;", + "", + "", + "", + L"", + L"test.html" + }, + { // No C-D + __LINE__, + "http://www.google.com/", + "", + "", + "", + "", + L"", + L"www.google.com" + }, + { + __LINE__, + "http://www.google.com/test.html", + "", + "", + "", + "", + L"", + L"test.html" + }, + { // Now that we use src/url's ExtractFileName, this case falls back to + // the hostname. If this behavior is not desirable, we'd better change + // ExtractFileName (in url_parse). + __LINE__, + "http://www.google.com/path/", + "", + "", + "", + "", + L"", + L"www.google.com" + }, + { + __LINE__, + "http://www.google.com/path", + "", + "", + "", + "", + L"", + L"path" + }, + { + __LINE__, + "file:///", + "", + "", + "", + "", + L"", + L"download" + }, + { + __LINE__, + "file:///path/testfile", + "", + "", + "", + "", + L"", + L"testfile" + }, + { + __LINE__, + "non-standard-scheme:", + "", + "", + "", + "", + L"", + L"download" + }, + { // C-D should override default + __LINE__, + "http://www.google.com/", + "attachment; filename =\"test.html\"", + "", + "", + "", + L"download", + L"test.html" + }, + { // But the URL shouldn't + __LINE__, + "http://www.google.com/", + "", + "", + "", + "", + L"download", + L"download" + }, + { + __LINE__, + "http://www.google.com/", + "attachment; filename=\"../test.html\"", + "", + "", + "", + L"", + L"-test.html" + }, + { + __LINE__, + "http://www.google.com/", + "attachment; filename=\"..\\test.html\"", + "", + "", + "", + L"", + L"test.html" + }, + { + __LINE__, + "http://www.google.com/", + "attachment; filename=\"..\\\\test.html\"", + "", + "", + "", + L"", + L"-test.html" + }, + { // Filename disappears after leading and trailing periods are removed. + __LINE__, + "http://www.google.com/", + "attachment; filename=\"..\"", + "", + "", + "", + L"default", + L"default" + }, + { // C-D specified filename disappears. Failover to final filename. + __LINE__, + "http://www.google.com/test.html", + "attachment; filename=\"..\"", + "", + "", + "", + L"default", + L"default" + }, + // Below is a small subset of cases taken from HttpContentDisposition tests. + { + __LINE__, + "http://www.google.com/", + "attachment; filename=\"%EC%98%88%EC%88%A0%20" + "%EC%98%88%EC%88%A0.jpg\"", + "", + "", + "", + L"", + L"\uc608\uc220 \uc608\uc220.jpg" + }, + { + __LINE__, + "http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg", + "", + "", + "", + "", + L"download", + L"\uc608\uc220 \uc608\uc220.jpg" + }, + { + __LINE__, + "http://www.google.com/", + "attachment;", + "", + "", + "", + L"\uB2E4\uC6B4\uB85C\uB4DC", + L"\uB2E4\uC6B4\uB85C\uB4DC" + }, + { + __LINE__, + "http://www.google.com/", + "attachment; filename=\"=?EUC-JP?Q?=B7=DD=BD=" + "D13=2Epng?=\"", + "", + "", + "", + L"download", + L"\u82b8\u88533.png" + }, + { + __LINE__, + "http://www.example.com/images?id=3", + "attachment; filename=caf\xc3\xa9.png", + "iso-8859-1", + "", + "", + L"", + L"caf\u00e9.png" + }, + { + __LINE__, + "http://www.example.com/images?id=3", + "attachment; filename=caf\xe5.png", + "windows-1253", + "", + "", + L"", + L"caf\u03b5.png" + }, + { + __LINE__, + "http://www.example.com/file?id=3", + "attachment; name=\xcf\xc2\xd4\xd8.zip", + "GBK", + "", + "", + L"", + L"\u4e0b\u8f7d.zip" + }, + { // Invalid C-D header. Extracts filename from url. + __LINE__, + "http://www.google.com/test.html", + "attachment; filename==?iiso88591?Q?caf=EG?=", + "", + "", + "", + L"", + L"test.html" + }, + // about: and data: URLs + { + __LINE__, + "about:chrome", + "", + "", + "", + "", + L"", + L"download" + }, + { + __LINE__, + "data:,looks/like/a.path", + "", + "", + "", + "", + L"", + L"download" + }, + { + __LINE__, + "data:text/plain;base64,VG8gYmUgb3Igbm90IHRvIGJlLg=", + "", + "", + "", + "", + L"", + L"download" + }, + { + __LINE__, + "data:,looks/like/a.path", + "", + "", + "", + "", + L"default_filename_is_given", + L"default_filename_is_given" + }, + { + __LINE__, + "data:,looks/like/a.path", + "", + "", + "", + "", + L"\u65e5\u672c\u8a9e", // Japanese Kanji. + L"\u65e5\u672c\u8a9e" + }, + { // The filename encoding is specified by the referrer charset. + __LINE__, + "http://example.com/V%FDvojov%E1%20psychologie.doc", + "", + "iso-8859-1", + "", + "", + L"", + L"V\u00fdvojov\u00e1 psychologie.doc" + }, + { // Suggested filename takes precedence over URL + __LINE__, + "http://www.google.com/test", + "", + "", + "suggested", + "", + L"", + L"suggested" + }, + { // The content-disposition has higher precedence over the suggested name. + __LINE__, + "http://www.google.com/test", + "attachment; filename=test.html", + "", + "suggested", + "", + L"", + L"test.html" + }, +#if 0 + { // The filename encoding doesn't match the referrer charset, the system + // charset, or UTF-8. + // TODO(jshin): we need to handle this case. + __LINE__, + "http://example.com/V%FDvojov%E1%20psychologie.doc", + "", + "utf-8", + "", + "", + L"", + L"V\u00fdvojov\u00e1 psychologie.doc", + }, +#endif + // Raw 8bit characters in C-D + { + __LINE__, + "http://www.example.com/images?id=3", + "attachment; filename=caf\xc3\xa9.png", + "iso-8859-1", + "", + "image/png", + L"", + L"caf\u00e9.png" + }, + { + __LINE__, + "http://www.example.com/images?id=3", + "attachment; filename=caf\xe5.png", + "windows-1253", + "", + "image/png", + L"", + L"caf\u03b5.png" + }, + { // No 'filename' keyword in the disposition, use the URL + __LINE__, + "http://www.evil.com/my_download.txt", + "a_file_name.txt", + "", + "", + "text/plain", + L"download", + L"my_download.txt" + }, + { // Spaces in the disposition file name + __LINE__, + "http://www.frontpagehacker.com/a_download.exe", + "filename=My Downloaded File.exe", + "", + "", + "application/octet-stream", + L"download", + L"My Downloaded File.exe" + }, + { // % encoded + __LINE__, + "http://www.examples.com/", + "attachment; " + "filename=\"%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg\"", + "", + "", + "image/jpeg", + L"download", + L"\uc608\uc220 \uc608\uc220.jpg" + }, + { // name= parameter + __LINE__, + "http://www.examples.com/q.cgi?id=abc", + "attachment; name=abc de.pdf", + "", + "", + "application/octet-stream", + L"download", + L"abc de.pdf" + }, + { + __LINE__, + "http://www.example.com/path", + "filename=\"=?EUC-JP?Q?=B7=DD=BD=D13=2Epng?=\"", + "", + "", + "image/png", + L"download", + L"\x82b8\x8853" L"3.png" + }, + { // The following two have invalid CD headers and filenames come from the + // URL. + __LINE__, + "http://www.example.com/test%20123", + "attachment; filename==?iiso88591?Q?caf=EG?=", + "", + "", + "image/jpeg", + L"download", + L"test 123" JPEG_EXT + }, + { + __LINE__, + "http://www.google.com/%EC%98%88%EC%88%A0%20%EC%98%88%EC%88%A0.jpg", + "malformed_disposition", + "", + "", + "image/jpeg", + L"download", + L"\uc608\uc220 \uc608\uc220.jpg" + }, + { // Invalid C-D. No filename from URL. Falls back to 'download'. + __LINE__, + "http://www.google.com/path1/path2/", + "attachment; filename==?iso88591?Q?caf=E3?", + "", + "", + "image/jpeg", + L"download", + L"download" JPEG_EXT + }, + }; + + // Tests filename generation. Once the correct filename is + // selected, they should be passed through the validation steps and + // a correct extension should be added if necessary. + const GenerateFilenameCase generation_tests[] = { + // Dotfiles. Ensures preceeding period(s) stripped. + { + __LINE__, + "http://www.google.com/.test.html", + "", + "", + "", + "", + L"", + L"test.html" + }, + { + __LINE__, + "http://www.google.com/.test", + "", + "", + "", + "", + L"", + L"test" + }, + { + __LINE__, + "http://www.google.com/..test", + "", + "", + "", + "", + L"", + L"test" + }, + { // Disposition has relative paths, remove directory separators + __LINE__, + "http://www.evil.com/my_download.txt", + "filename=../../../../././../a_file_name.txt", + "", + "", + "text/plain", + L"download", + L"-..-..-..-.-.-..-a_file_name.txt" + }, + { // Disposition has parent directories, remove directory separators + __LINE__, + "http://www.evil.com/my_download.txt", + "filename=dir1/dir2/a_file_name.txt", + "", + "", + "text/plain", + L"download", + L"dir1-dir2-a_file_name.txt" + }, + { // Disposition has relative paths, remove directory separators + __LINE__, + "http://www.evil.com/my_download.txt", + "filename=..\\..\\..\\..\\.\\.\\..\\a_file_name.txt", + "", + "", + "text/plain", + L"download", + L"-..-..-..-.-.-..-a_file_name.txt" + }, + { // Disposition has parent directories, remove directory separators + __LINE__, + "http://www.evil.com/my_download.txt", + "filename=dir1\\dir2\\a_file_name.txt", + "", + "", + "text/plain", + L"download", + L"dir1-dir2-a_file_name.txt" + }, + { // No useful information in disposition or URL, use default + __LINE__, + "http://www.truncated.com/path/", + "", + "", + "", + "text/plain", + L"download", + L"download" TXT_EXT + }, + { // Filename looks like HTML? + __LINE__, + "http://www.evil.com/get/malware/here", + "filename=\"<blink>Hello kitty</blink>\"", + "", + "", + "text/plain", + L"default", + L"-blink-Hello kitty--blink-" TXT_EXT + }, + { // A normal avi should get .avi and not .avi.avi + __LINE__, + "https://blah.google.com/misc/2.avi", + "", + "", + "", + "video/x-msvideo", + L"download", + L"2.avi" + }, + { // Extension generation + __LINE__, + "http://www.example.com/my-cat", + "filename=my-cat", + "", + "", + "image/jpeg", + L"download", + L"my-cat" JPEG_EXT + }, + { + __LINE__, + "http://www.example.com/my-cat", + "filename=my-cat", + "", + "", + "text/plain", + L"download", + L"my-cat.txt" + }, + { + __LINE__, + "http://www.example.com/my-cat", + "filename=my-cat", + "", + "", + "text/html", + L"download", + L"my-cat" HTML_EXT + }, + { // Unknown MIME type + __LINE__, + "http://www.example.com/my-cat", + "filename=my-cat", + "", + "", + "dance/party", + L"download", + L"my-cat" + }, + { + __LINE__, + "http://www.example.com/my-cat.jpg", + "filename=my-cat.jpg", + "", + "", + "text/plain", + L"download", + L"my-cat.jpg" + }, + // Windows specific tests +#if defined(OS_WIN) + { + __LINE__, + "http://www.goodguy.com/evil.exe", + "filename=evil.exe", + "", + "", + "image/jpeg", + L"download", + L"evil.exe" + }, + { + __LINE__, + "http://www.goodguy.com/ok.exe", + "filename=ok.exe", + "", + "", + "binary/octet-stream", + L"download", + L"ok.exe" + }, + { + __LINE__, + "http://www.goodguy.com/evil.dll", + "filename=evil.dll", + "", + "", + "dance/party", + L"download", + L"evil.dll" + }, + { + __LINE__, + "http://www.goodguy.com/evil.exe", + "filename=evil", + "", + "", + "application/rss+xml", + L"download", + L"evil" + }, + // Test truncation of trailing dots and spaces + { + __LINE__, + "http://www.goodguy.com/evil.exe ", + "filename=evil.exe ", + "", + "", + "binary/octet-stream", + L"download", + L"evil.exe" + }, + { + __LINE__, + "http://www.goodguy.com/evil.exe.", + "filename=evil.exe.", + "", + "", + "binary/octet-stream", + L"download", + L"evil.exe-" + }, + { + __LINE__, + "http://www.goodguy.com/evil.exe. . .", + "filename=evil.exe. . .", + "", + "", + "binary/octet-stream", + L"download", + L"evil.exe-------" + }, + { + __LINE__, + "http://www.goodguy.com/evil.", + "filename=evil.", + "", + "", + "binary/octet-stream", + L"download", + L"evil-" + }, + { + __LINE__, + "http://www.goodguy.com/. . . . .", + "filename=. . . . .", + "", + "", + "binary/octet-stream", + L"download", + L"download" + }, + { + __LINE__, + "http://www.badguy.com/attachment?name=meh.exe%C2%A0", + "attachment; filename=\"meh.exe\xC2\xA0\"", + "", + "", + "binary/octet-stream", + L"", + L"meh.exe-" + }, +#endif // OS_WIN + { + __LINE__, + "http://www.goodguy.com/utils.js", + "filename=utils.js", + "", + "", + "application/x-javascript", + L"download", + L"utils.js" + }, + { + __LINE__, + "http://www.goodguy.com/contacts.js", + "filename=contacts.js", + "", + "", + "application/json", + L"download", + L"contacts.js" + }, + { + __LINE__, + "http://www.goodguy.com/utils.js", + "filename=utils.js", + "", + "", + "text/javascript", + L"download", + L"utils.js" + }, + { + __LINE__, + "http://www.goodguy.com/utils.js", + "filename=utils.js", + "", + "", + "text/javascript;version=2", + L"download", + L"utils.js" + }, + { + __LINE__, + "http://www.goodguy.com/utils.js", + "filename=utils.js", + "", + "", + "application/ecmascript", + L"download", + L"utils.js" + }, + { + __LINE__, + "http://www.goodguy.com/utils.js", + "filename=utils.js", + "", + "", + "application/ecmascript;version=4", + L"download", + L"utils.js" + }, + { + __LINE__, + "http://www.goodguy.com/program.exe", + "filename=program.exe", + "", + "", + "application/foo-bar", + L"download", + L"program.exe" + }, + { + __LINE__, + "http://www.evil.com/../foo.txt", + "filename=../foo.txt", + "", + "", + "text/plain", + L"download", + L"-foo.txt" + }, + { + __LINE__, + "http://www.evil.com/..\\foo.txt", + "filename=..\\foo.txt", + "", + "", + "text/plain", + L"download", + L"-foo.txt" + }, + { + __LINE__, + "http://www.evil.com/.hidden", + "filename=.hidden", + "", + "", + "text/plain", + L"download", + L"hidden" TXT_EXT + }, + { + __LINE__, + "http://www.evil.com/trailing.", + "filename=trailing.", + "", + "", + "dance/party", + L"download", +#if defined(OS_WIN) + L"trailing-" +#else + L"trailing" +#endif + }, + { + __LINE__, + "http://www.evil.com/trailing.", + "filename=trailing.", + "", + "", + "text/plain", + L"download", +#if defined(OS_WIN) + L"trailing-" TXT_EXT +#else + L"trailing" TXT_EXT +#endif + }, + { + __LINE__, + "http://www.evil.com/.", + "filename=.", + "", + "", + "dance/party", + L"download", + L"download" + }, + { + __LINE__, + "http://www.evil.com/..", + "filename=..", + "", + "", + "dance/party", + L"download", + L"download" + }, + { + __LINE__, + "http://www.evil.com/...", + "filename=...", + "", + "", + "dance/party", + L"download", + L"download" + }, + { // Note that this one doesn't have "filename=" on it. + __LINE__, + "http://www.evil.com/", + "a_file_name.txt", + "", + "", + "image/jpeg", + L"download", + L"download" JPEG_EXT + }, + { + __LINE__, + "http://www.evil.com/", + "filename=", + "", + "", + "image/jpeg", + L"download", + L"download" JPEG_EXT + }, + { + __LINE__, + "http://www.example.com/simple", + "filename=simple", + "", + "", + "application/octet-stream", + L"download", + L"simple" + }, + // Reserved words on Windows + { + __LINE__, + "http://www.goodguy.com/COM1", + "filename=COM1", + "", + "", + "application/foo-bar", + L"download", +#if defined(OS_WIN) + L"_COM1" +#else + L"COM1" +#endif + }, + { + __LINE__, + "http://www.goodguy.com/COM4.txt", + "filename=COM4.txt", + "", + "", + "text/plain", + L"download", +#if defined(OS_WIN) + L"_COM4.txt" +#else + L"COM4.txt" +#endif + }, + { + __LINE__, + "http://www.goodguy.com/lpt1.TXT", + "filename=lpt1.TXT", + "", + "", + "text/plain", + L"download", +#if defined(OS_WIN) + L"_lpt1.TXT" +#else + L"lpt1.TXT" +#endif + }, + { + __LINE__, + "http://www.goodguy.com/clock$.txt", + "filename=clock$.txt", + "", + "", + "text/plain", + L"download", +#if defined(OS_WIN) + L"_clock$.txt" +#else + L"clock$.txt" +#endif + }, + { // Validation should also apply to sugested name + __LINE__, + "http://www.goodguy.com/blah$.txt", + "filename=clock$.txt", + "", + "clock$.txt", + "text/plain", + L"download", +#if defined(OS_WIN) + L"_clock$.txt" +#else + L"clock$.txt" +#endif + }, + { + __LINE__, + "http://www.goodguy.com/mycom1.foo", + "filename=mycom1.foo", + "", + "", + "text/plain", + L"download", + L"mycom1.foo" + }, + { + __LINE__, + "http://www.badguy.com/Setup.exe.local", + "filename=Setup.exe.local", + "", + "", + "application/foo-bar", + L"download", +#if defined(OS_WIN) + L"Setup.exe.download" +#else + L"Setup.exe.local" +#endif + }, + { + __LINE__, + "http://www.badguy.com/Setup.exe.local", + "filename=Setup.exe.local.local", + "", + "", + "application/foo-bar", + L"download", +#if defined(OS_WIN) + L"Setup.exe.local.download" +#else + L"Setup.exe.local.local" +#endif + }, + { + __LINE__, + "http://www.badguy.com/Setup.exe.lnk", + "filename=Setup.exe.lnk", + "", + "", + "application/foo-bar", + L"download", +#if defined(OS_WIN) + L"Setup.exe.download" +#else + L"Setup.exe.lnk" +#endif + }, + { + __LINE__, + "http://www.badguy.com/Desktop.ini", + "filename=Desktop.ini", + "", + "", + "application/foo-bar", + L"download", +#if defined(OS_WIN) + L"_Desktop.ini" +#else + L"Desktop.ini" +#endif + }, + { + __LINE__, + "http://www.badguy.com/Thumbs.db", + "filename=Thumbs.db", + "", + "", + "application/foo-bar", + L"download", +#if defined(OS_WIN) + L"_Thumbs.db" +#else + L"Thumbs.db" +#endif + }, + { + __LINE__, + "http://www.hotmail.com", + "filename=source.jpg", + "", + "", + "application/x-javascript", + L"download", + L"source.jpg" + }, + { // http://crbug.com/5772. + __LINE__, + "http://www.example.com/foo.tar.gz", + "", + "", + "", + "application/x-tar", + L"download", + L"foo.tar.gz" + }, + { // http://crbug.com/52250. + __LINE__, + "http://www.example.com/foo.tgz", + "", + "", + "", + "application/x-tar", + L"download", + L"foo.tgz" + }, + { // http://crbug.com/7337. + __LINE__, + "http://maged.lordaeron.org/blank.reg", + "", + "", + "", + "text/x-registry", + L"download", + L"blank.reg" + }, + { + __LINE__, + "http://www.example.com/bar.tar", + "", + "", + "", + "application/x-tar", + L"download", + L"bar.tar" + }, + { + __LINE__, + "http://www.example.com/bar.bogus", + "", + "", + "", + "application/x-tar", + L"download", + L"bar.bogus" + }, + { // http://crbug.com/20337 + __LINE__, + "http://www.example.com/.download.txt", + "filename=.download.txt", + "", + "", + "text/plain", + L"-download", + L"download.txt" + }, + { // http://crbug.com/56855. + __LINE__, + "http://www.example.com/bar.sh", + "", + "", + "", + "application/x-sh", + L"download", + L"bar.sh" + }, + { // http://crbug.com/61571 + __LINE__, + "http://www.example.com/npdf.php?fn=foobar.pdf", + "", + "", + "", + "text/plain", + L"download", + L"npdf" TXT_EXT + }, + { // Shouldn't overwrite C-D specified extension. + __LINE__, + "http://www.example.com/npdf.php?fn=foobar.pdf", + "filename=foobar.jpg", + "", + "", + "text/plain", + L"download", + L"foobar.jpg" + }, + { // http://crbug.com/87719 + __LINE__, + "http://www.example.com/image.aspx?id=blargh", + "", + "", + "", + "image/jpeg", + L"download", + L"image" JPEG_EXT + }, +#if defined(OS_CHROMEOS) + { // http://crosbug.com/26028 + __LINE__, + "http://www.example.com/fooa%cc%88.txt", + "", + "", + "", + "image/jpeg", + L"foo\xe4", + L"foo\xe4.txt" + }, +#endif + }; + + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(selection_tests); ++i) + RunGenerateFileNameTestCase(&selection_tests[i]); + + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(generation_tests); ++i) + RunGenerateFileNameTestCase(&generation_tests[i]); + + for (size_t i = 0; i < ARRAYSIZE_UNSAFE(generation_tests); ++i) { + GenerateFilenameCase test_case = generation_tests[i]; + test_case.referrer_charset = "GBK"; + RunGenerateFileNameTestCase(&test_case); + } +} + // This is currently a windows specific function. #if defined(OS_WIN) namespace { @@ -1812,6 +3368,88 @@ TEST(NetUtilTest, GetNetworkList) { } } +static const base::FilePath::CharType* kSafePortableBasenames[] = { + FILE_PATH_LITERAL("a"), + FILE_PATH_LITERAL("a.txt"), + FILE_PATH_LITERAL("a b.txt"), + FILE_PATH_LITERAL("a-b.txt"), + FILE_PATH_LITERAL("My Computer"), + FILE_PATH_LITERAL(" Computer"), +}; + +static const base::FilePath::CharType* kUnsafePortableBasenames[] = { + FILE_PATH_LITERAL(""), + FILE_PATH_LITERAL("."), + FILE_PATH_LITERAL(".."), + FILE_PATH_LITERAL("..."), + FILE_PATH_LITERAL("con"), + FILE_PATH_LITERAL("con.zip"), + FILE_PATH_LITERAL("NUL"), + FILE_PATH_LITERAL("NUL.zip"), + FILE_PATH_LITERAL(".a"), + FILE_PATH_LITERAL("a."), + FILE_PATH_LITERAL("a\"a"), + FILE_PATH_LITERAL("a<a"), + FILE_PATH_LITERAL("a>a"), + FILE_PATH_LITERAL("a?a"), + FILE_PATH_LITERAL("a/"), + FILE_PATH_LITERAL("a\\"), + FILE_PATH_LITERAL("a "), + FILE_PATH_LITERAL("a . ."), + FILE_PATH_LITERAL("My Computer.{a}"), + FILE_PATH_LITERAL("My Computer.{20D04FE0-3AEA-1069-A2D8-08002B30309D}"), +#if !defined(OS_WIN) + FILE_PATH_LITERAL("a\\a"), +#endif +}; + +static const base::FilePath::CharType* kSafePortableRelativePaths[] = { + FILE_PATH_LITERAL("a/a"), +#if defined(OS_WIN) + FILE_PATH_LITERAL("a\\a"), +#endif +}; + +TEST(NetUtilTest, IsSafePortablePathComponent) { + for (size_t i = 0 ; i < arraysize(kSafePortableBasenames); ++i) { + EXPECT_TRUE(IsSafePortablePathComponent(base::FilePath( + kSafePortableBasenames[i]))) << kSafePortableBasenames[i]; + } + for (size_t i = 0 ; i < arraysize(kUnsafePortableBasenames); ++i) { + EXPECT_FALSE(IsSafePortablePathComponent(base::FilePath( + kUnsafePortableBasenames[i]))) << kUnsafePortableBasenames[i]; + } + for (size_t i = 0 ; i < arraysize(kSafePortableRelativePaths); ++i) { + EXPECT_FALSE(IsSafePortablePathComponent(base::FilePath( + kSafePortableRelativePaths[i]))) << kSafePortableRelativePaths[i]; + } +} + +TEST(NetUtilTest, IsSafePortableRelativePath) { + base::FilePath safe_dirname(FILE_PATH_LITERAL("a")); + for (size_t i = 0 ; i < arraysize(kSafePortableBasenames); ++i) { + EXPECT_TRUE(IsSafePortableRelativePath(base::FilePath( + kSafePortableBasenames[i]))) << kSafePortableBasenames[i]; + EXPECT_TRUE(IsSafePortableRelativePath(safe_dirname.Append(base::FilePath( + kSafePortableBasenames[i])))) << kSafePortableBasenames[i]; + } + for (size_t i = 0 ; i < arraysize(kSafePortableRelativePaths); ++i) { + EXPECT_TRUE(IsSafePortableRelativePath(base::FilePath( + kSafePortableRelativePaths[i]))) << kSafePortableRelativePaths[i]; + EXPECT_TRUE(IsSafePortableRelativePath(safe_dirname.Append(base::FilePath( + kSafePortableRelativePaths[i])))) << kSafePortableRelativePaths[i]; + } + for (size_t i = 0 ; i < arraysize(kUnsafePortableBasenames); ++i) { + EXPECT_FALSE(IsSafePortableRelativePath(base::FilePath( + kUnsafePortableBasenames[i]))) << kUnsafePortableBasenames[i]; + if (!base::FilePath::StringType(kUnsafePortableBasenames[i]).empty()) { + EXPECT_FALSE(IsSafePortableRelativePath(safe_dirname.Append( + base::FilePath(kUnsafePortableBasenames[i])))) + << kUnsafePortableBasenames[i]; + } + } +} + struct NonUniqueNameTestData { bool is_unique; const char* hostname; diff --git a/net/base/net_util_win.cc b/net/base/net_util_win.cc index c4cedee4..14f3170d 100644 --- a/net/base/net_util_win.cc +++ b/net/base/net_util_win.cc @@ -85,6 +85,59 @@ struct WlanApi { namespace net { +bool FileURLToFilePath(const GURL& url, base::FilePath* file_path) { + *file_path = base::FilePath(); + std::wstring& file_path_str = const_cast<std::wstring&>(file_path->value()); + file_path_str.clear(); + + if (!url.is_valid()) + return false; + + std::string path; + std::string host = url.host(); + if (host.empty()) { + // URL contains no host, the path is the filename. In this case, the path + // will probably be preceeded with a slash, as in "/C:/foo.txt", so we + // trim out that here. + path = url.path(); + size_t first_non_slash = path.find_first_not_of("/\\"); + if (first_non_slash != std::string::npos && first_non_slash > 0) + path.erase(0, first_non_slash); + } else { + // URL contains a host: this means it's UNC. We keep the preceeding slash + // on the path. + path = "\\\\"; + path.append(host); + path.append(url.path()); + } + + if (path.empty()) + return false; + std::replace(path.begin(), path.end(), '/', '\\'); + + // GURL stores strings as percent-encoded UTF-8, this will undo if possible. + path = UnescapeURLComponent(path, + UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS); + + if (!IsStringUTF8(path)) { + // Not UTF-8, assume encoding is native codepage and we're done. We know we + // are giving the conversion function a nonempty string, and it may fail if + // the given string is not in the current encoding and give us an empty + // string back. We detect this and report failure. + file_path_str = base::SysNativeMBToWide(path); + return !file_path_str.empty(); + } + file_path_str.assign(base::UTF8ToWide(path)); + + // We used to try too hard and see if |path| made up entirely of + // the 1st 256 characters in the Unicode was a zero-extended UTF-16. + // If so, we converted it to 'Latin-1' and checked if the result was UTF-8. + // If the check passed, we converted the result to UTF-8. + // Otherwise, we treated the result as the native OS encoding. + // However, that led to http://crbug.com/4619 and http://crbug.com/14153 + return true; +} + bool GetNetworkList(NetworkInterfaceList* networks, int policy) { // GetAdaptersAddresses() may require IO operations. base::ThreadRestrictions::AssertIOAllowed(); diff --git a/net/filter/filter.cc b/net/filter/filter.cc index 497b9dc..90d91e1 100644 --- a/net/filter/filter.cc +++ b/net/filter/filter.cc @@ -6,12 +6,11 @@ #include "base/files/file_path.h" #include "base/strings/string_util.h" -#include "net/base/filename_util.h" #include "net/base/io_buffer.h" #include "net/base/mime_util.h" +#include "net/base/net_util.h" #include "net/filter/gzip_filter.h" #include "net/filter/sdch_filter.h" -#include "url/gurl.h" namespace { diff --git a/net/net.gyp b/net/net.gyp index 63d62e6..074c867 100644 --- a/net/net.gyp +++ b/net/net.gyp @@ -123,16 +123,14 @@ 'base/file_stream_net_log_parameters.cc', 'base/file_stream_net_log_parameters.h', 'base/file_stream_whence.h', - 'base/filename_util.cc', - 'base/filename_util.h', + 'base/int128.cc', + 'base/int128.h', 'base/hash_value.cc', 'base/hash_value.h', 'base/host_mapping_rules.cc', 'base/host_mapping_rules.h', 'base/host_port_pair.cc', 'base/host_port_pair.h', - 'base/int128.cc', - 'base/int128.h', 'base/io_buffer.cc', 'base/io_buffer.h', 'base/iovec.h', @@ -1702,10 +1700,9 @@ 'base/escape_unittest.cc', 'base/expiring_cache_unittest.cc', 'base/file_stream_unittest.cc', - 'base/filename_util_unittest.cc', + 'base/int128_unittest.cc', 'base/host_mapping_rules_unittest.cc', 'base/host_port_pair_unittest.cc', - 'base/int128_unittest.cc', 'base/ip_endpoint_unittest.cc', 'base/ip_pattern_unittest.cc', 'base/keygen_handler_unittest.cc', @@ -1735,17 +1732,17 @@ 'cert/jwk_serializer_unittest.cc', 'cert/multi_log_ct_verifier_unittest.cc', 'cert/multi_threaded_cert_verifier_unittest.cc', - 'cert/nss_cert_database_chromeos_unittest.cc', 'cert/nss_cert_database_unittest.cc', + 'cert/nss_cert_database_chromeos_unittest.cc', 'cert/nss_profile_filter_chromeos_unittest.cc', 'cert/pem_tokenizer_unittest.cc', 'cert/signed_certificate_timestamp_unittest.cc', 'cert/test_root_certs_unittest.cc', - 'cert/x509_cert_types_unittest.cc', 'cert/x509_certificate_unittest.cc', + 'cert/x509_cert_types_unittest.cc', + 'cert/x509_util_unittest.cc', 'cert/x509_util_nss_unittest.cc', 'cert/x509_util_openssl_unittest.cc', - 'cert/x509_util_unittest.cc', 'cookies/canonical_cookie_unittest.cc', 'cookies/cookie_constants_unittest.cc', 'cookies/cookie_monster_unittest.cc', @@ -1784,9 +1781,9 @@ 'dns/mapped_host_resolver_unittest.cc', 'dns/mdns_cache_unittest.cc', 'dns/mdns_client_unittest.cc', + 'dns/serial_worker_unittest.cc', 'dns/record_parsed_unittest.cc', 'dns/record_rdata_unittest.cc', - 'dns/serial_worker_unittest.cc', 'dns/single_request_host_resolver_unittest.cc', 'filter/filter_unittest.cc', 'filter/gzip_filter_unittest.cc', diff --git a/net/proxy/proxy_script_fetcher_impl_unittest.cc b/net/proxy/proxy_script_fetcher_impl_unittest.cc index 53f7655..f0505540 100644 --- a/net/proxy/proxy_script_fetcher_impl_unittest.cc +++ b/net/proxy/proxy_script_fetcher_impl_unittest.cc @@ -10,8 +10,8 @@ #include "base/files/file_path.h" #include "base/path_service.h" #include "base/strings/utf_string_conversions.h" -#include "net/base/filename_util.h" #include "net/base/load_flags.h" +#include "net/base/net_util.h" #include "net/base/test_completion_callback.h" #include "net/cert/mock_cert_verifier.h" #include "net/disk_cache/disk_cache.h" diff --git a/net/url_request/file_protocol_handler.cc b/net/url_request/file_protocol_handler.cc index ceed930..ef8096f 100644 --- a/net/url_request/file_protocol_handler.cc +++ b/net/url_request/file_protocol_handler.cc @@ -6,8 +6,8 @@ #include "base/logging.h" #include "base/task_runner.h" -#include "net/base/filename_util.h" #include "net/base/net_errors.h" +#include "net/base/net_util.h" #include "net/url_request/url_request.h" #include "net/url_request/url_request_error_job.h" #include "net/url_request/url_request_file_dir_job.h" diff --git a/net/url_request/url_request_file_job.cc b/net/url_request/url_request_file_job.cc index d244970..7db8d81 100644 --- a/net/url_request/url_request_file_job.cc +++ b/net/url_request/url_request_file_job.cc @@ -29,11 +29,11 @@ #include "base/threading/thread_restrictions.h" #include "build/build_config.h" #include "net/base/file_stream.h" -#include "net/base/filename_util.h" #include "net/base/io_buffer.h" #include "net/base/load_flags.h" #include "net/base/mime_util.h" #include "net/base/net_errors.h" +#include "net/base/net_util.h" #include "net/filter/filter.h" #include "net/http/http_util.h" #include "net/url_request/url_request_error_job.h" diff --git a/net/url_request/url_request_unittest.cc b/net/url_request/url_request_unittest.cc index 7eb607e..01af382 100644 --- a/net/url_request/url_request_unittest.cc +++ b/net/url_request/url_request_unittest.cc @@ -30,7 +30,6 @@ #include "base/strings/stringprintf.h" #include "base/strings/utf_string_conversions.h" #include "net/base/capturing_net_log.h" -#include "net/base/filename_util.h" #include "net/base/load_flags.h" #include "net/base/load_timing_info.h" #include "net/base/load_timing_info_test_util.h" |