diff options
author | sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-04-07 21:02:11 +0000 |
---|---|---|
committer | sra@chromium.org <sra@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2009-04-07 21:02:11 +0000 |
commit | 12adfaa77882b5049465b2d32e16d23c1f349e2f (patch) | |
tree | b83321d8af6d9524b6d98e089ffae13439d3faa3 /base | |
parent | 86da65dfde60bfd08a0494e82797d8796fef02fe (diff) | |
download | chromium_src-12adfaa77882b5049465b2d32e16d23c1f349e2f.zip chromium_src-12adfaa77882b5049465b2d32e16d23c1f349e2f.tar.gz chromium_src-12adfaa77882b5049465b2d32e16d23c1f349e2f.tar.bz2 |
Separate out file_util functions that use ICU into their own file.
This prevents ~300K of ICU being linked into chrome.exe due to a false
dependency.
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@13281 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base')
-rw-r--r-- | base/base.gyp | 1 | ||||
-rw-r--r-- | base/file_util.cc | 73 | ||||
-rw-r--r-- | base/file_util_icu.cc | 90 |
3 files changed, 92 insertions, 72 deletions
diff --git a/base/base.gyp b/base/base.gyp index ee56c96..325ab65 100644 --- a/base/base.gyp +++ b/base/base.gyp @@ -92,6 +92,7 @@ 'file_path.h', 'file_util.cc', 'file_util.h', + 'file_util_icu.cc', 'file_util_linux.cc', 'file_util_mac.mm', 'file_util_posix.cc', diff --git a/base/file_util.cc b/base/file_util.cc index cb6c7e01..9ba6eed 100644 --- a/base/file_util.cc +++ b/base/file_util.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. +// Copyright (c) 2006-2009 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. @@ -14,7 +14,6 @@ #include "base/file_path.h" #include "base/logging.h" #include "base/string_util.h" -#include "unicode/uniset.h" #include "base/string_piece.h" #include "base/sys_string_conversions.h" @@ -144,76 +143,6 @@ void ReplaceExtension(FilePath* path, const FilePath::StringType& extension) { value.append(clean_extension); } -void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) { - DCHECK(file_name); - - // Control characters, formatting characters, non-characters, and - // some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). - // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx - // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx - // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they - // are legitimate in Arabic and some S/SE Asian scripts. However, when used - // elsewhere, they can be confusing/problematic. - // Also, consider wrapping the set with our Singleton class to create and - // freeze it only once. Note that there's a trade-off between memory and - // speed. - - UErrorCode status = U_ZERO_ERROR; -#if defined(WCHAR_T_IS_UTF16) - UnicodeSet illegal_characters(UnicodeString( - L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status); -#else - UnicodeSet illegal_characters(UNICODE_STRING_SIMPLE( - "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), status); -#endif - DCHECK(U_SUCCESS(status)); - // Add non-characters. If this becomes a performance bottleneck by - // any chance, check |ucs4 & 0xFFFEu == 0xFFFEu|, instead. - illegal_characters.add(0xFDD0, 0xFDEF); - for (int i = 0; i <= 0x10; ++i) { - int plane_base = 0x10000 * i; - illegal_characters.add(plane_base + 0xFFFE, plane_base + 0xFFFF); - } - illegal_characters.freeze(); - DCHECK(!illegal_characters.contains(replace_char) && replace_char < 0x10000); - - // Remove leading and trailing whitespace. - TrimWhitespace(*file_name, TRIM_ALL, file_name); - - std::wstring::size_type i = 0; - std::wstring::size_type length = file_name->size(); - const wchar_t* wstr = file_name->data(); -#if defined(WCHAR_T_IS_UTF16) - // Using |span| method of UnicodeSet might speed things up a bit, but - // it's not likely to matter here. - std::wstring temp; - temp.reserve(length); - while (i < length) { - UChar32 ucs4; - std::wstring::size_type prev = i; - U16_NEXT(wstr, i, length, ucs4); - if (illegal_characters.contains(ucs4)) { - temp.push_back(replace_char); - } else if (ucs4 < 0x10000) { - temp.push_back(ucs4); - } else { - temp.push_back(wstr[prev]); - temp.push_back(wstr[prev + 1]); - } - } - file_name->swap(temp); -#elif defined(WCHAR_T_IS_UTF32) - while (i < length) { - if (illegal_characters.contains(wstr[i])) { - (*file_name)[i] = replace_char; - } - ++i; - } -#else -#error wchar_t* should be either UTF-16 or UTF-32 -#endif -} - bool ContentsEqual(const FilePath& filename1, const FilePath& filename2) { // We open the file in binary format even if they are text files because // we are just comparing that bytes are exactly same in both files and not diff --git a/base/file_util_icu.cc b/base/file_util_icu.cc new file mode 100644 index 0000000..0b9830d --- /dev/null +++ b/base/file_util_icu.cc @@ -0,0 +1,90 @@ +// Copyright (c) 2009 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +// File utilities that use the ICU library go in this file. Functions using ICU +// are separated from the other functions to prevent ICU being pulled in by the +// linker if there is a false dependency. +// +// (The VS2005 linker finds such a false dependency and adds ~300K of ICU to +// chrome.exe if this code lives in file_util.cc, even though none of this code +// is called.) + +#include "base/file_util.h" + +#include "base/string_util.h" +#include "unicode/uniset.h" + +namespace file_util { + +void ReplaceIllegalCharacters(std::wstring* file_name, int replace_char) { + DCHECK(file_name); + + // Control characters, formatting characters, non-characters, and + // some printable ASCII characters regarded as dangerous ('"*/:<>?\\'). + // See http://blogs.msdn.com/michkap/archive/2006/11/03/941420.aspx + // and http://msdn2.microsoft.com/en-us/library/Aa365247.aspx + // TODO(jungshik): Revisit the set. ZWJ and ZWNJ are excluded because they + // are legitimate in Arabic and some S/SE Asian scripts. However, when used + // elsewhere, they can be confusing/problematic. + // Also, consider wrapping the set with our Singleton class to create and + // freeze it only once. Note that there's a trade-off between memory and + // speed. + + UErrorCode status = U_ZERO_ERROR; +#if defined(WCHAR_T_IS_UTF16) + UnicodeSet illegal_characters(UnicodeString( + L"[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\u200c\u200d]]"), status); +#else + UnicodeSet illegal_characters(UNICODE_STRING_SIMPLE( + "[[\"*/:<>?\\\\|][:Cc:][:Cf:] - [\\u200c\\u200d]]").unescape(), status); +#endif + DCHECK(U_SUCCESS(status)); + // Add non-characters. If this becomes a performance bottleneck by + // any chance, check |ucs4 & 0xFFFEu == 0xFFFEu|, instead. + illegal_characters.add(0xFDD0, 0xFDEF); + for (int i = 0; i <= 0x10; ++i) { + int plane_base = 0x10000 * i; + illegal_characters.add(plane_base + 0xFFFE, plane_base + 0xFFFF); + } + illegal_characters.freeze(); + DCHECK(!illegal_characters.contains(replace_char) && replace_char < 0x10000); + + // Remove leading and trailing whitespace. + TrimWhitespace(*file_name, TRIM_ALL, file_name); + + std::wstring::size_type i = 0; + std::wstring::size_type length = file_name->size(); + const wchar_t* wstr = file_name->data(); +#if defined(WCHAR_T_IS_UTF16) + // Using |span| method of UnicodeSet might speed things up a bit, but + // it's not likely to matter here. + std::wstring temp; + temp.reserve(length); + while (i < length) { + UChar32 ucs4; + std::wstring::size_type prev = i; + U16_NEXT(wstr, i, length, ucs4); + if (illegal_characters.contains(ucs4)) { + temp.push_back(replace_char); + } else if (ucs4 < 0x10000) { + temp.push_back(ucs4); + } else { + temp.push_back(wstr[prev]); + temp.push_back(wstr[prev + 1]); + } + } + file_name->swap(temp); +#elif defined(WCHAR_T_IS_UTF32) + while (i < length) { + if (illegal_characters.contains(wstr[i])) { + (*file_name)[i] = replace_char; + } + ++i; + } +#else +#error wchar_t* should be either UTF-16 or UTF-32 +#endif +} + +} // namespace |