Clean up recent string conversion function changes, part 1: Remove unnecessary code. Thanks to a change in escape.cc I can basically revert the ICU conversions back to what they used to be; I can also get rid of half the conversions immediately since they aren't used.

This does not split out the "adjust" versions of the UTF conversions into their own header/implementation; that's coming in the next patch. BUG=4010 TEST=none Review URL: http://codereview.chromium.org/380007 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@31533 0039d316-1c4b-4281-b951-d872f2087c98
author: pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-11-10 01:43:15 +0000
committer: pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2009-11-10 01:43:15 +0000
commit: 2500a0f7c4ac54a55d621069dc044ddc6702a518 (patch)
tree: 960a571bb2b49c9005fd152138e4398df9072854 /base/i18n
parent: 0d435eae98fbe4f8055215a7aa6c483b07b7fa03 (diff)
download: chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.zip
chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.tar.gz
chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.tar.bz2
3 files changed, 24 insertions, 165 deletions
diff --git a/base/i18n/icu_string_conversions.cc b/base/i18n/icu_string_conversions.cc
index c93b103..252eb9c 100644
--- a/base/i18n/icu_string_conversions.cc
+++ b/base/i18n/icu_string_conversions.cc
@@ -157,8 +157,6 @@ const char kCodepageUTF16LE[] = "UTF-16LE";
 
 // Codepage <-> Wide/UTF-16  ---------------------------------------------------
 
-// Convert a UTF-16 string into the specified codepage_name.  If the codepage
-// isn't found, return false.
 bool UTF16ToCodepage(const string16& utf16,
                      const char* codepage_name,
                      OnStringConversionError::Type on_error,
@@ -174,11 +172,10 @@ bool UTF16ToCodepage(const string16& utf16,
                           static_cast<int>(utf16.length()), on_error, encoded);
 }
 
-bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
-                                    const char* codepage_name,
-                                    OnStringConversionError::Type on_error,
-                                    string16* utf16,
-                                    size_t* offset_for_adjustment) {
+bool CodepageToUTF16(const std::string& encoded,
+                     const char* codepage_name,
+                     OnStringConversionError::Type on_error,
+                     string16* utf16) {
   utf16->clear();
 
   UErrorCode status = U_ZERO_ERROR;
@@ -197,40 +194,9 @@ bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
   size_t uchar_max_length = encoded.length() + 1;
 
   SetUpErrorHandlerForToUChars(on_error, converter, &status);
-  char16* byte_buffer = WriteInto(utf16, uchar_max_length);
-  int byte_buffer_length = static_cast<int>(uchar_max_length);
-  const char* data = encoded.data();
-  int length = static_cast<int>(encoded.length());
-  int actual_size = 0;
-  if (offset_for_adjustment) {
-    if (*offset_for_adjustment >= encoded.length()) {
-      *offset_for_adjustment = string16::npos;
-    } else if (*offset_for_adjustment != 0) {
-      // Try to adjust the offset by converting the string in two pieces and
-      // using the length of the first piece as the adjusted offset.
-      actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length,
-          data, static_cast<int>(*offset_for_adjustment), &status);
-      if (U_SUCCESS(status)) {
-        // Conversion succeeded, so update the offset and then fall through to
-        // appending the second half of the string.
-        data += *offset_for_adjustment;
-        length -= *offset_for_adjustment;
-        *offset_for_adjustment = actual_size;
-        byte_buffer += actual_size;
-        byte_buffer_length -= actual_size;
-      } else {
-        // The offset may have been in the middle of an encoding sequence; mark
-        // it as having failed to adjust and then try to convert the entire
-        // string.
-        *offset_for_adjustment = string16::npos;
-        actual_size = 0;
-        ucnv_reset(converter);
-        status = U_ZERO_ERROR;
-      }
-    }
-  }
-  actual_size += ucnv_toUChars(converter, byte_buffer, byte_buffer_length, data,
-                               length, &status);
+  int actual_size = ucnv_toUChars(converter, WriteInto(utf16, uchar_max_length),
+      static_cast<int>(uchar_max_length), encoded.data(),
+      static_cast<int>(encoded.length()), &status);
   ucnv_close(converter);
   if (!U_SUCCESS(status)) {
     utf16->clear();  // Make sure the output is empty on error.
@@ -241,8 +207,6 @@ bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
   return true;
 }
 
-// Convert a wstring into the specified codepage_name.  If the codepage
-// isn't found, return false.
 bool WideToCodepage(const std::wstring& wide,
                     const char* codepage_name,
                     OnStringConversionError::Type on_error,
@@ -272,16 +236,12 @@ bool WideToCodepage(const std::wstring& wide,
 #endif  // defined(WCHAR_T_IS_UTF32)
 }
 
-// Converts a string of the given codepage into wstring.
-// If the codepage isn't found, return false.
-bool CodepageToWideAndAdjustOffset(const std::string& encoded,
-                                   const char* codepage_name,
-                                   OnStringConversionError::Type on_error,
-                                   std::wstring* wide,
-                                   size_t* offset_for_adjustment) {
+bool CodepageToWide(const std::string& encoded,
+                    const char* codepage_name,
+                    OnStringConversionError::Type on_error,
+                    std::wstring* wide) {
 #if defined(WCHAR_T_IS_UTF16)
-  return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, wide,
-                                        offset_for_adjustment);
+  return CodepageToUTF16(encoded, codepage_name, on_error, wide);
 #elif defined(WCHAR_T_IS_UTF32)
   wide->clear();
 
@@ -297,42 +257,10 @@ bool CodepageToWideAndAdjustOffset(const std::string& encoded,
   size_t wchar_max_length = encoded.length() + 1;
 
   SetUpErrorHandlerForToUChars(on_error, converter, &status);
-  char* byte_buffer =
-      reinterpret_cast<char*>(WriteInto(wide, wchar_max_length));
-  int byte_buffer_length = static_cast<int>(wchar_max_length) * sizeof(wchar_t);
-  const char* data = encoded.data();
-  int length = static_cast<int>(encoded.length());
-  int actual_size = 0;
-  if (offset_for_adjustment) {
-    if (*offset_for_adjustment >= encoded.length()) {
-      *offset_for_adjustment = std::wstring::npos;
-    } else if (*offset_for_adjustment != 0) {
-      // Try to adjust the offset by converting the string in two pieces and
-      // using the length of the first piece as the adjusted offset.
-      actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,
-          byte_buffer, byte_buffer_length, data,
-          static_cast<int>(*offset_for_adjustment), &status);
-      if (U_SUCCESS(status)) {
-        // Conversion succeeded, so update the offset and then fall through to
-        // appending the second half of the string.
-        data += *offset_for_adjustment;
-        length -= *offset_for_adjustment;
-        *offset_for_adjustment = actual_size / sizeof(wchar_t);
-        byte_buffer += actual_size;
-        byte_buffer_length -= actual_size;
-      } else {
-        // The offset may have been in the middle of an encoding sequence; mark
-        // it as having failed to adjust and then try to convert the entire
-        // string.
-        *offset_for_adjustment = std::wstring::npos;
-        actual_size = 0;
-        ucnv_reset(converter);
-        status = U_ZERO_ERROR;
-      }
-    }
-  }
-  actual_size += ucnv_toAlgorithmic(utf32_platform_endian(), converter,
-      byte_buffer, byte_buffer_length, data, length, &status);
+  int actual_size = ucnv_toAlgorithmic(utf32_platform_endian(), converter,
+      reinterpret_cast<char*>(WriteInto(wide, wchar_max_length)),
+      static_cast<int>(wchar_max_length) * sizeof(wchar_t), encoded.data(),
+      static_cast<int>(encoded.length()), &status);
   ucnv_close(converter);
   if (!U_SUCCESS(status)) {
     wide->clear();  // Make sure the output is empty on error.
diff --git a/base/i18n/icu_string_conversions.h b/base/i18n/icu_string_conversions.h
index 6f2cab7..7b0c77e 100644
--- a/base/i18n/icu_string_conversions.h
+++ b/base/i18n/icu_string_conversions.h
@@ -40,17 +40,6 @@ extern const char kCodepageUTF8[];
 extern const char kCodepageUTF16BE[];
 extern const char kCodepageUTF16LE[];
 
-// Like CodepageToUTF16() (see below), but also takes an offset into |encoded|,
-// which will be adjusted to point at the same logical place in |utf16|.  If
-// this isn't possible because it points past the end of |encoded| or into the
-// middle of a multibyte sequence, it will be set to std::string16::npos.
-// |offset_for_adjustment| may be NULL.
-bool CodepageToUTF16AndAdjustOffset(const std::string& encoded,
-                                    const char* codepage_name,
-                                    OnStringConversionError::Type on_error,
-                                    string16* utf16,
-                                    size_t* offset_for_adjustment);
-
 // Converts between UTF-16 strings and the encoding specified.  If the
 // encoding doesn't exist or the encoding fails (when on_error is FAIL),
 // returns false.
@@ -58,24 +47,10 @@ bool UTF16ToCodepage(const string16& utf16,
                      const char* codepage_name,
                      OnStringConversionError::Type on_error,
                      std::string* encoded);
-inline bool CodepageToUTF16(const std::string& encoded,
-                            const char* codepage_name,
-                            OnStringConversionError::Type on_error,
-                            string16* utf16) {
-  return CodepageToUTF16AndAdjustOffset(encoded, codepage_name, on_error, utf16,
-                                        NULL);
-}
-
-// Like CodepageToWide() (see below), but also takes an offset into |encoded|,
-// which will be adjusted to point at the same logical place in |wide|.  If
-// this isn't possible because it points past the end of |encoded| or into the
-// middle of a multibyte sequence, it will be set to std::wstring::npos.
-// |offset_for_adjustment| may be NULL.
-bool CodepageToWideAndAdjustOffset(const std::string& encoded,
-                                   const char* codepage_name,
-                                   OnStringConversionError::Type on_error,
-                                   std::wstring* wide,
-                                   size_t* offset_for_adjustment);
+bool CodepageToUTF16(const std::string& encoded,
+                     const char* codepage_name,
+                     OnStringConversionError::Type on_error,
+                     string16* utf16);
 
 // Converts between wide strings and the encoding specified.  If the
 // encoding doesn't exist or the encoding fails (when on_error is FAIL),
@@ -84,13 +59,10 @@ bool WideToCodepage(const std::wstring& wide,
                     const char* codepage_name,
                     OnStringConversionError::Type on_error,
                     std::string* encoded);
-inline bool CodepageToWide(const std::string& encoded,
-                           const char* codepage_name,
-                           OnStringConversionError::Type on_error,
-                           std::wstring* wide) {
-  return CodepageToWideAndAdjustOffset(encoded, codepage_name, on_error, wide,
-                                       NULL);
-}
+bool CodepageToWide(const std::string& encoded,
+                    const char* codepage_name,
+                    OnStringConversionError::Type on_error,
+                    std::wstring* wide);
 
 }  // namespace base
 
diff --git a/base/i18n/icu_string_conversions_unittest.cc b/base/i18n/icu_string_conversions_unittest.cc
index 0088a03..2083fa9 100644
--- a/base/i18n/icu_string_conversions_unittest.cc
+++ b/base/i18n/icu_string_conversions_unittest.cc
@@ -325,45 +325,4 @@ TEST(ICUStringConversionsTest, ConvertBetweenCodepageAndUTF16) {
   }
 }
 
-static const struct {
-  const char* codepage_name;
-  const char* encoded;
-  size_t input_offset;
-  size_t u16_output_offset;
-  size_t wide_output_offset;
-} kAdjustOffsetCases[] = {
-  {"gb2312", "", 0, string16::npos, std::wstring::npos},
-  {"gb2312", "\xC4\xE3\xBA\xC3", 0, 0, 0},
-  {"gb2312", "\xC4\xE3\xBA\xC3", 2, 1, 1},
-  {"gb2312", "\xC4\xE3\xBA\xC3", 4, string16::npos, std::wstring::npos},
-  {"gb2312", "\xC4\xE3\xBA\xC3", 1, string16::npos, std::wstring::npos},
-  {"gb2312", "\xC4\xE3\xBA\xC3", std::string::npos, string16::npos,
-   std::wstring::npos},
-  {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 2, string16::npos,
-   std::wstring::npos},
-  {"gb18030", "\x95\x32\x82\x36\xD2\xBB", 4, 2, 1},
-};
-
-TEST(ICUStringConversionsTest, AdjustOffset) {
-  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kAdjustOffsetCases); ++i) {
-    string16 utf16;
-    size_t offset = kAdjustOffsetCases[i].input_offset;
-    EXPECT_TRUE(CodepageToUTF16AndAdjustOffset(kAdjustOffsetCases[i].encoded,
-        kAdjustOffsetCases[i].codepage_name,
-        OnStringConversionError::FAIL, &utf16, &offset));
-    EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset);
-
-    std::wstring wide;
-    offset = kAdjustOffsetCases[i].input_offset;
-    CodepageToWideAndAdjustOffset(kAdjustOffsetCases[i].encoded,
-        kAdjustOffsetCases[i].codepage_name,
-        OnStringConversionError::FAIL, &wide, &offset);
-#if defined(WCHAR_T_IS_UTF16)
-    EXPECT_EQ(kAdjustOffsetCases[i].u16_output_offset, offset);
-#elif defined(WCHAR_T_IS_UTF32)
-    EXPECT_EQ(kAdjustOffsetCases[i].wide_output_offset, offset);
-#endif
-  }
-}
-
 }  // namespace base
author	pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-11-10 01:43:15 +0000
committer	pkasting@chromium.org <pkasting@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2009-11-10 01:43:15 +0000
commit	2500a0f7c4ac54a55d621069dc044ddc6702a518 (patch)
tree	960a571bb2b49c9005fd152138e4398df9072854 /base/i18n
parent	0d435eae98fbe4f8055215a7aa6c483b07b7fa03 (diff)
download	chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.zip chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.tar.gz chromium_src-2500a0f7c4ac54a55d621069dc044ddc6702a518.tar.bz2