summaryrefslogtreecommitdiffstats
path: root/base
diff options
context:
space:
mode:
authorsra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-04-07 21:02:11 +0000
committersra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2009-04-07 21:02:11 +0000
commit12adfaa77882b5049465b2d32e16d23c1f349e2f (patch)
treeb83321d8af6d9524b6d98e089ffae13439d3faa3 /base
parent86da65dfde60bfd08a0494e82797d8796fef02fe (diff)
downloadchromium_src-12adfaa77882b5049465b2d32e16d23c1f349e2f.zip
chromium_src-12adfaa77882b5049465b2d32e16d23c1f349e2f.tar.gz
chromium_src-12adfaa77882b5049465b2d32e16d23c1f349e2f.tar.bz2
Separate out file_util functions that use ICU into their own file.
This prevents ~300K of ICU being linked into chrome.exe due to a false dependency. git-svn-id: svn://svn.chromium.org/chrome/trunk/src@13281 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r--base/base.gyp1
-rw-r--r--base/file_util.cc73
-rw-r--r--base/file_util_icu.cc90
3 files changed, 92 insertions, 72 deletions
diff --git a/base/base.gyp b/base/base.gyp
index ee56c96..325ab65 100644
--- a/base/base.gyp
+++ b/base/base.gyp
@@ -92,6 +92,7 @@
'file_path.h',
'file_util.cc',
'file_util.h',
+ 'file_util_icu.cc',
'file_util_linux.cc',
'file_util_mac.mm',
'file_util_posix.cc',
diff --git a/base/file_util.cc b/base/file_util.cc
index cb6c7e01..9ba6eed 100644
--- a/base/file_util.cc
+++ b/base/file_util.cc
@@ -1,4 +1,4 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
@@ -14,7 +14,6 @@
#include "base/file_path.h"
#include "base/logging.h"
#include "base/string_util.h"
-#include "unicode/uniset.h"
#include "base/string_piece.h"
#include "base/sys_string_conversions.h"
@@ -144,76 +143,6 @@ void ReplaceExtension(FilePath* path, const FilePath::StringType& extension) {
value.append(clean_extension);
}
-void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {
- DCHECK(file_name);
-
- // Control characters, formatting characters, non-characters, and
- // some printable ASCII characters regarded as dangerous ('"*/:<>?\\').
- // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx
- // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx
- // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they
- // are legitimate in Arabic and some S/SE Asian scripts. However, when used
- // elsewhere, they can be confusing/problematic.
- // Also, consider wrapping the set with our Singleton class to create and
- // freeze it only once. Note that there's a trade-off between memory and
- // speed.
-
- UErrorCode status = U_ZERO_ERROR;
-#if defined(WCHAR_T_IS_UTF16)
- UnicodeSet illegal_characters(UnicodeString(
- L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status);
-#else
- UnicodeSet illegal_characters(UNICODE_STRING_SIMPLE(
- "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), status);
-#endif
- DCHECK(U_SUCCESS(status));
- // Add non-characters. If this becomes a performance bottleneck by
- // any chance, check |ucs4 & 0xFFFEu == 0xFFFEu|, instead.
- illegal_characters.add(0xFDD0, 0xFDEF);
- for (int i = 0; i <= 0x10; ++i) {
- int plane_base = 0x10000 * i;
- illegal_characters.add(plane_base + 0xFFFE, plane_base + 0xFFFF);
- }
- illegal_characters.freeze();
- DCHECK(!illegal_characters.contains(replace_char) && replace_char < 0x10000);
-
- // Remove leading and trailing whitespace.
- TrimWhitespace(*file_name, TRIM_ALL, file_name);
-
- std::wstring::size_type i = 0;
- std::wstring::size_type length = file_name->size();
- const wchar_t* wstr = file_name->data();
-#if defined(WCHAR_T_IS_UTF16)
- // Using |span| method of UnicodeSet might speed things up a bit, but
- // it's not likely to matter here.
- std::wstring temp;
- temp.reserve(length);
- while (i < length) {
- UChar32 ucs4;
- std::wstring::size_type prev = i;
- U16_NEXT(wstr, i, length, ucs4);
- if (illegal_characters.contains(ucs4)) {
- temp.push_back(replace_char);
- } else if (ucs4 < 0x10000) {
- temp.push_back(ucs4);
- } else {
- temp.push_back(wstr[prev]);
- temp.push_back(wstr[prev + 1]);
- }
- }
- file_name->swap(temp);
-#elif defined(WCHAR_T_IS_UTF32)
- while (i < length) {
- if (illegal_characters.contains(wstr[i])) {
- (*file_name)[i] = replace_char;
- }
- ++i;
- }
-#else
-#error wchar_t* should be either UTF-16 or UTF-32
-#endif
-}
-
bool ContentsEqual(const FilePath& filename1, const FilePath& filename2) {
// We open the file in binary format even if they are text files because
// we are just comparing that bytes are exactly same in both files and not
diff --git a/base/file_util_icu.cc b/base/file_util_icu.cc
new file mode 100644
index 0000000..0b9830d
--- /dev/null
+++ b/base/file_util_icu.cc
@@ -0,0 +1,90 @@
+// Copyright (c) 2009 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+// File utilities that use the ICU library go in this file. Functions using ICU
+// are separated from the other functions to prevent ICU being pulled in by the
+// linker if there is a false dependency.
+//
+// (The VS2005 linker finds such a false dependency and adds ~300K of ICU to
+// chrome.exe if this code lives in file_util.cc, even though none of this code
+// is called.)
+
+#include "base/file_util.h"
+
+#include "base/string_util.h"
+#include "unicode/uniset.h"
+
+namespace file_util {
+
+void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) {
+ DCHECK(file_name);
+
+ // Control characters, formatting characters, non-characters, and
+ // some printable ASCII characters regarded as dangerous ('"*/:<>?\\').
+ // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx
+ // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx
+ // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they
+ // are legitimate in Arabic and some S/SE Asian scripts. However, when used
+ // elsewhere, they can be confusing/problematic.
+ // Also, consider wrapping the set with our Singleton class to create and
+ // freeze it only once. Note that there's a trade-off between memory and
+ // speed.
+
+ UErrorCode status = U_ZERO_ERROR;
+#if defined(WCHAR_T_IS_UTF16)
+ UnicodeSet illegal_characters(UnicodeString(
+ L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status);
+#else
+ UnicodeSet illegal_characters(UNICODE_STRING_SIMPLE(
+ "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), status);
+#endif
+ DCHECK(U_SUCCESS(status));
+ // Add non-characters. If this becomes a performance bottleneck by
+ // any chance, check |ucs4 & 0xFFFEu == 0xFFFEu|, instead.
+ illegal_characters.add(0xFDD0, 0xFDEF);
+ for (int i = 0; i <= 0x10; ++i) {
+ int plane_base = 0x10000 * i;
+ illegal_characters.add(plane_base + 0xFFFE, plane_base + 0xFFFF);
+ }
+ illegal_characters.freeze();
+ DCHECK(!illegal_characters.contains(replace_char) && replace_char < 0x10000);
+
+ // Remove leading and trailing whitespace.
+ TrimWhitespace(*file_name, TRIM_ALL, file_name);
+
+ std::wstring::size_type i = 0;
+ std::wstring::size_type length = file_name->size();
+ const wchar_t* wstr = file_name->data();
+#if defined(WCHAR_T_IS_UTF16)
+ // Using |span| method of UnicodeSet might speed things up a bit, but
+ // it's not likely to matter here.
+ std::wstring temp;
+ temp.reserve(length);
+ while (i < length) {
+ UChar32 ucs4;
+ std::wstring::size_type prev = i;
+ U16_NEXT(wstr, i, length, ucs4);
+ if (illegal_characters.contains(ucs4)) {
+ temp.push_back(replace_char);
+ } else if (ucs4 < 0x10000) {
+ temp.push_back(ucs4);
+ } else {
+ temp.push_back(wstr[prev]);
+ temp.push_back(wstr[prev + 1]);
+ }
+ }
+ file_name->swap(temp);
+#elif defined(WCHAR_T_IS_UTF32)
+ while (i < length) {
+ if (illegal_characters.contains(wstr[i])) {
+ (*file_name)[i] = replace_char;
+ }
+ ++i;
+ }
+#else
+#error wchar_t* should be either UTF-16 or UTF-32
+#endif
+}
+
+} // namespace