1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
|
// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "net/base/net_util.h"
#include "base/string_piece.h"
#include "base/string_util.h"
#include "base/sys_string_conversions.h"
#include "googleurl/src/gurl.h"
#include "net/base/escape.h"
namespace net {
bool FileURLToFilePath(const GURL& url, std::wstring* file_path) {
file_path->clear();
if (!url.is_valid())
return false;
std::string path;
std::string host = url.host();
if (host.empty()) {
// URL contains no host, the path is the filename. In this case, the path
// will probably be preceeded with a slash, as in "/C:/foo.txt", so we
// trim out that here.
path = url.path();
size_t first_non_slash = path.find_first_not_of("/\\");
if (first_non_slash != std::string::npos && first_non_slash > 0)
path.erase(0, first_non_slash);
} else {
// URL contains a host: this means it's UNC. We keep the preceeding slash
// on the path.
path = "\\\\";
path.append(host);
path.append(url.path());
}
if (path.empty())
return false;
std::replace(path.begin(), path.end(), '/', '\\');
// GURL stores strings as percent-encoded UTF-8, this will undo if possible.
path = UnescapeURLComponent(path,
UnescapeRule::SPACES | UnescapeRule::URL_SPECIAL_CHARS);
if (!IsStringUTF8(path)) {
// Not UTF-8, assume encoding is native codepage and we're done. We know we
// are giving the conversion function a nonempty string, and it may fail if
// the given string is not in the current encoding and give us an empty
// string back. We detect this and report failure.
*file_path = base::SysNativeMBToWide(path);
return !file_path->empty();
}
file_path->assign(UTF8ToWide(path));
// Now we have an unescaped filename, but are still not sure about its
// encoding. For example, each character could be part of a UTF-8 string.
if (file_path->empty() || !IsString8Bit(*file_path)) {
// assume our 16-bit encoding is correct if it won't fit into an 8-bit
// string
return true;
}
// Convert our narrow string into the native wide path.
std::string narrow;
if (!WideToLatin1(*file_path, &narrow)) {
NOTREACHED() << "Should have filtered out non-8-bit strings above.";
return false;
}
if (IsStringUTF8(narrow)) {
// Our string actually looks like it could be UTF-8, convert to 8-bit
// UTF-8 and then to the corresponding wide string.
*file_path = UTF8ToWide(narrow);
} else {
// Our wide string contains only 8-bit characters and it's not UTF-8, so
// we assume it's in the native codepage.
*file_path = base::SysNativeMBToWide(narrow);
}
// Fail if 8-bit -> wide conversion failed and gave us an empty string back
// (we already filtered out empty strings above).
return !file_path->empty();
}
} // namespace net
|