summaryrefslogtreecommitdiffstats
path: root/base/file_util.cc
diff options
context:
space:
mode:
authorsra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-04-07 21:02:11 +0000
committersra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-04-07 21:02:11 +0000
commit12adfaa77882b5049465b2d32e16d23c1f349e2f (patch)
treeb83321d8af6d9524b6d98e089ffae13439d3faa3 /base/file_util.cc
parent86da65dfde60bfd08a0494e82797d8796fef02fe (diff)
downloadchromium_src-12adfaa77882b5049465b2d32e16d23c1f349e2f.zip
chromium_src-12adfaa77882b5049465b2d32e16d23c1f349e2f.tar.gz
chromium_src-12adfaa77882b5049465b2d32e16d23c1f349e2f.tar.bz2
Separate out file_util functions that use ICU into their own file.
This prevents ~300K of ICU being linked into chrome.exe due to a false dependency. git-svn-id: svn://svn.chromium.org/chrome/trunk/src@13281 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/file_util.cc')
-rw-r--r--base/file_util.cc73
1 files changed, 1 insertions, 72 deletions
diff --git a/base/file_util.cc b/base/file_util.cc
index cb6c7e01..9ba6eed 100644
--- a/base/file_util.cc
+++ b/base/file_util.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -14,7 +14,6 @@
#include "base/file_path.h"
#include "base/logging.h"
#include "base/string_util.h"
-#include "unicode/uniset.h"
#include "base/string_piece.h"
#include "base/sys_string_conversions.h"
@@ -144,76 +143,6 @@ void ReplaceExtension(FilePath* path, const FilePath::StringType& extension) {
value.append(clean_extension);
}
-void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {
- DCHECK(file_name);
-
- // Control characters, formatting characters, non-characters, and
- // some printable ASCII characters regarded as dangerous ('"*/:<>?\\').
- // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx
- // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx
- // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they
- // are legitimate in Arabic and some S/SE Asian scripts. However, when used
- // elsewhere, they can be confusing/problematic.
- // Also, consider wrapping the set with our Singleton class to create and
- // freeze it only once. Note that there's a trade-off between memory and
- // speed.
-
- UErrorCode status = U_ZERO_ERROR;
-#if defined(WCHAR_T_IS_UTF16)
- UnicodeSet illegal_characters(UnicodeString(
- L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status);
-#else
- UnicodeSet illegal_characters(UNICODE_STRING_SIMPLE(
- "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), status);
-#endif
- DCHECK(U_SUCCESS(status));
- // Add non-characters. If this becomes a performance bottleneck by
- // any chance, check |ucs4 & 0xFFFEu == 0xFFFEu|, instead.
- illegal_characters.add(0xFDD0, 0xFDEF);
- for (int i = 0; i <= 0x10; ++i) {
- int plane_base = 0x10000 * i;
- illegal_characters.add(plane_base + 0xFFFE, plane_base + 0xFFFF);
- }
- illegal_characters.freeze();
- DCHECK(!illegal_characters.contains(replace_char) && replace_char < 0x10000);
-
- // Remove leading and trailing whitespace.
- TrimWhitespace(*file_name, TRIM_ALL, file_name);
-
- std::wstring::size_type i = 0;
- std::wstring::size_type length = file_name->size();
- const wchar_t* wstr = file_name->data();
-#if defined(WCHAR_T_IS_UTF16)
- // Using |span| method of UnicodeSet might speed things up a bit, but
- // it's not likely to matter here.
- std::wstring temp;
- temp.reserve(length);
- while (i < length) {
- UChar32 ucs4;
- std::wstring::size_type prev = i;
- U16_NEXT(wstr, i, length, ucs4);
- if (illegal_characters.contains(ucs4)) {
- temp.push_back(replace_char);
- } else if (ucs4 < 0x10000) {
- temp.push_back(ucs4);
- } else {
- temp.push_back(wstr[prev]);
- temp.push_back(wstr[prev + 1]);
- }
- }
- file_name->swap(temp);
-#elif defined(WCHAR_T_IS_UTF32)
- while (i < length) {
- if (illegal_characters.contains(wstr[i])) {
- (*file_name)[i] = replace_char;
- }
- ++i;
- }
-#else
-#error wchar_t* should be either UTF-16 or UTF-32
-#endif
-}
-
bool ContentsEqual(const FilePath& filename1, const FilePath& filename2) {
// We open the file in binary format even if they are text files because
// we are just comparing that bytes are exactly same in both files and not