// Copyright (c) 2012 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. #include "ppapi/shared_impl/private/ppb_char_set_shared.h" #include #include "base/i18n/icu_string_conversions.h" #include "ppapi/c/dev/ppb_memory_dev.h" #include "ppapi/thunk/thunk.h" #include "third_party/icu/source/common/unicode/ucnv.h" #include "third_party/icu/source/common/unicode/ucnv_cb.h" #include "third_party/icu/source/common/unicode/ucnv_err.h" #include "third_party/icu/source/common/unicode/ustring.h" namespace ppapi { namespace { PP_CharSet_Trusted_ConversionError DeprecatedToConversionError( PP_CharSet_ConversionError on_error) { switch (on_error) { case PP_CHARSET_CONVERSIONERROR_SKIP: return PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP; case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE: return PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE; case PP_CHARSET_CONVERSIONERROR_FAIL: default: return PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL; } } // Converts the given PP error handling behavior to the version in base, // placing the result in |*result| and returning true on success. Returns false // if the enum is invalid. bool PPToBaseConversionError(PP_CharSet_Trusted_ConversionError on_error, base::OnStringConversionError::Type* result) { switch (on_error) { case PP_CHARSET_TRUSTED_CONVERSIONERROR_FAIL: *result = base::OnStringConversionError::FAIL; return true; case PP_CHARSET_TRUSTED_CONVERSIONERROR_SKIP: *result = base::OnStringConversionError::SKIP; return true; case PP_CHARSET_TRUSTED_CONVERSIONERROR_SUBSTITUTE: *result = base::OnStringConversionError::SUBSTITUTE; return true; default: return false; } } } // namespace // static // The "substitution" behavior of this function does not match the // implementation in base, so we partially duplicate the code from // icu_string_conversions.cc with the correct error handling setup required // by the PPAPI interface. char* PPB_CharSet_Shared::UTF16ToCharSetDeprecated( const uint16_t* utf16, uint32_t utf16_len, const char* output_char_set, PP_CharSet_ConversionError deprecated_on_error, uint32_t* output_length) { *output_length = 0; PP_CharSet_Trusted_ConversionError on_error = DeprecatedToConversionError( deprecated_on_error); // Compute required length. uint32_t required_length = 0; UTF16ToCharSet(utf16, utf16_len, output_char_set, on_error, NULL, &required_length); // Our output is null terminated, so need one more byte. char* ret_buf = static_cast( thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc(required_length + 1)); // Do the conversion into the buffer. PP_Bool result = UTF16ToCharSet(utf16, utf16_len, output_char_set, on_error, ret_buf, &required_length); if (result == PP_FALSE) { thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf); return NULL; } ret_buf[required_length] = 0; // Null terminate. *output_length = required_length; return ret_buf; } // static PP_Bool PPB_CharSet_Shared::UTF16ToCharSet( const uint16_t utf16[], uint32_t utf16_len, const char* output_char_set, PP_CharSet_Trusted_ConversionError on_error, char* output_buffer, uint32_t* output_length) { if (!utf16 || !output_char_set || !output_length) { *output_length = 0; return PP_FALSE; } UErrorCode status = U_ZERO_ERROR; UConverter* converter = ucnv_open(output_char_set, &status); if (!U_SUCCESS(status)) { *output_length = 0; return PP_FALSE; } // Setup our error handler. switch (on_error) { case PP_CHARSET_CONVERSIONERROR_FAIL: ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_STOP, 0, NULL, NULL, &status); break; case PP_CHARSET_CONVERSIONERROR_SKIP: ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SKIP, 0, NULL, NULL, &status); break; case PP_CHARSET_CONVERSIONERROR_SUBSTITUTE: { // ICU sets the substitution char for some character sets (like latin1) // to be the ASCII "substitution character" (26). We want to use '?' // instead for backwards-compat with Windows behavior. char subst_chars[32]; int8_t subst_chars_len = 32; ucnv_getSubstChars(converter, subst_chars, &subst_chars_len, &status); if (subst_chars_len == 1 && subst_chars[0] == 26) { // Override to the question mark character if possible. When using // setSubstString, the input is a Unicode character. The function will // try to convert it to the destination character set and fail if that // can not be converted to the destination character set. // // We just ignore any failure. If the dest char set has no // representation for '?', then we'll just stick to the ICU default // substitution character. UErrorCode subst_status = U_ZERO_ERROR; UChar question_mark = '?'; ucnv_setSubstString(converter, &question_mark, 1, &subst_status); } ucnv_setFromUCallBack(converter, UCNV_FROM_U_CALLBACK_SUBSTITUTE, 0, NULL, NULL, &status); break; } default: *output_length = 0; ucnv_close(converter); return PP_FALSE; } // ucnv_fromUChars returns required size not including terminating null. *output_length = static_cast(ucnv_fromUChars( converter, output_buffer, output_buffer ? *output_length : 0, reinterpret_cast(utf16), utf16_len, &status)); ucnv_close(converter); if (status == U_BUFFER_OVERFLOW_ERROR) { // Don't treat this as a fatal error since we need to return the string // size. return PP_TRUE; } else if (!U_SUCCESS(status)) { *output_length = 0; return PP_FALSE; } return PP_TRUE; } // static uint16_t* PPB_CharSet_Shared::CharSetToUTF16Deprecated( const char* input, uint32_t input_len, const char* input_char_set, PP_CharSet_ConversionError deprecated_on_error, uint32_t* output_length) { *output_length = 0; PP_CharSet_Trusted_ConversionError on_error = DeprecatedToConversionError( deprecated_on_error); // Compute required length. uint32_t required_length = 0; CharSetToUTF16(input, input_len, input_char_set, on_error, NULL, &required_length); // Our output is null terminated, so need one more byte. uint16_t* ret_buf = static_cast( thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemAlloc( (required_length + 1) * sizeof(uint16_t))); // Do the conversion into the buffer. PP_Bool result = CharSetToUTF16(input, input_len, input_char_set, on_error, ret_buf, &required_length); if (result == PP_FALSE) { thunk::GetPPB_Memory_Dev_0_1_Thunk()->MemFree(ret_buf); return NULL; } ret_buf[required_length] = 0; // Null terminate. *output_length = required_length; return ret_buf; } PP_Bool PPB_CharSet_Shared::CharSetToUTF16( const char* input, uint32_t input_len, const char* input_char_set, PP_CharSet_Trusted_ConversionError on_error, uint16_t* output_buffer, uint32_t* output_utf16_length) { if (!input || !input_char_set || !output_utf16_length) { *output_utf16_length = 0; return PP_FALSE; } base::OnStringConversionError::Type base_on_error; if (!PPToBaseConversionError(on_error, &base_on_error)) { *output_utf16_length = 0; return PP_FALSE; // Invalid enum value. } // We can convert this call to the implementation in base to avoid code // duplication, although this does introduce an extra copy of the data. base::string16 output; if (!base::CodepageToUTF16(std::string(input, input_len), input_char_set, base_on_error, &output)) { *output_utf16_length = 0; return PP_FALSE; } if (output_buffer) { memcpy(output_buffer, output.c_str(), std::min(*output_utf16_length, static_cast(output.size())) * sizeof(uint16_t)); } *output_utf16_length = static_cast(output.size()); return PP_TRUE; } } // namespace ppapi