Use the upstream version of TextCodecMac.cpp, it should satisfy all of our

needs and desires now Review URL: http://codereview.chromium.org/6471 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@2860 0039d316-1c4b-4281-b951-d872f2087c98
author: mark@chromium.org <mark@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2008-10-03 22:15:04 +0000
committer: mark@chromium.org <mark@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2008-10-03 22:15:04 +0000
commit: e2bc4e3a7b54c26689379b01a61e72e7b86a8213 (patch)
tree: 3b09d25a140a658c9befd60d1f27261cef1873ea /webkit/pending
parent: 21b55170e4b56ea7cb85b46b6ea716e62a771e1a (diff)
download: chromium_src-e2bc4e3a7b54c26689379b01a61e72e7b86a8213.zip
chromium_src-e2bc4e3a7b54c26689379b01a61e72e7b86a8213.tar.gz
chromium_src-e2bc4e3a7b54c26689379b01a61e72e7b86a8213.tar.bz2
1 files changed, 0 insertions, 321 deletions
diff --git a/webkit/pending/TextCodecMac.cpp b/webkit/pending/TextCodecMac.cpp
deleted file mode 100644
index b55516f..0000000
--- a/webkit/pending/TextCodecMac.cpp
+++ /dev/null
@@ -1,321 +0,0 @@
-/*
- * Copyright (C) 2004, 2006, 2008 Apple Inc. All rights reserved.
- * Copyright (C) 2006 Alexey Proskuryakov <ap@nypop.com>
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY APPLE COMPUTER, INC. ``AS IS'' AND ANY
- * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE COMPUTER, INC. OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
- * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
- */
-
-#include "config.h"
-#include "TextCodecMac.h"
-
-#include "CString.h"
-#include "CharacterNames.h"
-#include "CharsetData.h"
-#include "PlatformString.h"
-#include <wtf/Assertions.h>
-
-using std::auto_ptr;
-using std::min;
-
-namespace WebCore {
-
-// We need to keep this because ICU doesn't support some of the encodings that we need:
-// <http://bugs.webkit.org/show_bug.cgi?id=4195>.
-
-const size_t ConversionBufferSize = 16384;
-
-static TECObjectRef cachedConverterTEC;
-static TECTextEncodingID cachedConverterEncoding = invalidEncoding;
-
-void TextCodecMac::registerEncodingNames(EncodingNameRegistrar registrar)
-{
-    TECTextEncodingID lastEncoding = invalidEncoding;
-    const char* lastName = 0;
-
-    for (size_t i = 0; CharsetTable[i].name; ++i) {
-        if (CharsetTable[i].encoding != lastEncoding) {
-            lastEncoding = CharsetTable[i].encoding;
-            lastName = CharsetTable[i].name;
-        }
-        registrar(CharsetTable[i].name, lastName);
-    }
-}
-
-static auto_ptr<TextCodec> newTextCodecMac(const TextEncoding&, const void* additionalData)
-{
-    return auto_ptr<TextCodec>(new TextCodecMac(*static_cast<const TECTextEncodingID*>(additionalData)));
-}
-
-void TextCodecMac::registerCodecs(TextCodecRegistrar registrar)
-{
-    TECTextEncodingID lastEncoding = invalidEncoding;
-
-    for (size_t i = 0; CharsetTable[i].name; ++i)
-        if (CharsetTable[i].encoding != lastEncoding) {
-            registrar(CharsetTable[i].name, newTextCodecMac, &CharsetTable[i].encoding);
-            lastEncoding = CharsetTable[i].encoding;
-        }
-}
-
-TextCodecMac::TextCodecMac(TECTextEncodingID encoding)
-    : m_encoding(encoding)
-    , m_error(false)
-    , m_numBufferedBytes(0)
-    , m_converterTEC(0)
-{
-}
-
-TextCodecMac::~TextCodecMac()
-{
-    releaseTECConverter();
-}
-
-void TextCodecMac::releaseTECConverter() const
-{
-    if (m_converterTEC) {
-        if (cachedConverterTEC != 0)
-            TECDisposeConverter(cachedConverterTEC);
-        cachedConverterTEC = m_converterTEC;
-        cachedConverterEncoding = m_encoding;
-        m_converterTEC = 0;
-    }
-}
-
-OSStatus TextCodecMac::createTECConverter() const
-{
-    bool cachedEncodingEqual = cachedConverterEncoding == m_encoding;
-    cachedConverterEncoding = invalidEncoding;
-
-    if (cachedEncodingEqual && cachedConverterTEC) {
-        m_converterTEC = cachedConverterTEC;
-        cachedConverterTEC = 0;
-        TECClearConverterContextInfo(m_converterTEC);
-    } else {
-        OSStatus status = TECCreateConverter(&m_converterTEC, m_encoding,
-            CreateTextEncoding(kTextEncodingUnicodeDefault, kTextEncodingDefaultVariant, kUnicode16BitFormat));
-        if (status)
-            return status;
-
-        TECSetBasicOptions(m_converterTEC, kUnicodeForceASCIIRangeMask);
-    }
-    
-    return noErr;
-}
-
-OSStatus TextCodecMac::decode(const unsigned char* inputBuffer, int inputBufferLength, int& inputLength,
-    void *outputBuffer, int outputBufferLength, int& outputLength)
-{
-    OSStatus status;
-    unsigned long bytesRead = 0;
-    unsigned long bytesWritten = 0;
-
-    if (m_numBufferedBytes != 0) {
-        // Finish converting a partial character that's in our buffer.
-        
-        // First, fill the partial character buffer with as many bytes as are available.
-        ASSERT(m_numBufferedBytes < sizeof(m_bufferedBytes));
-        const int spaceInBuffer = sizeof(m_bufferedBytes) - m_numBufferedBytes;
-        const int bytesToPutInBuffer = MIN(spaceInBuffer, inputBufferLength);
-        ASSERT(bytesToPutInBuffer != 0);
-        memcpy(m_bufferedBytes + m_numBufferedBytes, inputBuffer, bytesToPutInBuffer);
-
-        // Now, do a conversion on the buffer.
-        status = TECConvertText(m_converterTEC, m_bufferedBytes, m_numBufferedBytes + bytesToPutInBuffer, &bytesRead,
-            reinterpret_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten);
-        ASSERT(bytesRead <= m_numBufferedBytes + bytesToPutInBuffer);
-
-        if (status == kTECPartialCharErr && bytesRead == 0) {
-            // Handle the case where the partial character was not converted.
-            if (bytesToPutInBuffer >= spaceInBuffer) {
-                LOG_ERROR("TECConvertText gave a kTECPartialCharErr but read none of the %zu bytes in the buffer", sizeof(m_bufferedBytes));
-                m_numBufferedBytes = 0;
-                status = kTECUnmappableElementErr; // should never happen, but use this error code
-            } else {
-                // Tell the caller we read all the source bytes and keep them in the buffer.
-                m_numBufferedBytes += bytesToPutInBuffer;
-                bytesRead = bytesToPutInBuffer;
-                status = noErr;
-            }
-        } else {
-            // We are done with the partial character buffer.
-            // Also, we have read some of the bytes from the main buffer.
-            if (bytesRead > m_numBufferedBytes) {
-                bytesRead -= m_numBufferedBytes;
-            } else {
-                LOG_ERROR("TECConvertText accepted some bytes it previously rejected with kTECPartialCharErr");
-                bytesRead = 0;
-            }
-            m_numBufferedBytes = 0;
-            if (status == kTECPartialCharErr) {
-                // While there may be a partial character problem in the small buffer,
-                // we have to try again and not get confused and think there is a partial
-                // character problem in the large buffer.
-                status = noErr;
-            }
-        }
-    } else {
-        status = TECConvertText(m_converterTEC, inputBuffer, inputBufferLength, &bytesRead,
-            static_cast<unsigned char*>(outputBuffer), outputBufferLength, &bytesWritten);
-        ASSERT(static_cast<int>(bytesRead) <= inputBufferLength);
-    }
-
-    // Work around bug 3351093, where sometimes we get kTECBufferBelowMinimumSizeErr instead of kTECOutputBufferFullStatus.
-    if (status == kTECBufferBelowMinimumSizeErr && bytesWritten != 0) {
-        status = kTECOutputBufferFullStatus;
-    }
-
-    inputLength = bytesRead;
-    outputLength = bytesWritten;
-    return status;
-}
-
-String TextCodecMac::decode(const char* bytes, size_t length, bool flush)
-{
-    // Get a converter for the passed-in encoding.
-    if (!m_converterTEC && createTECConverter() != noErr)
-        return String();
-    
-    Vector<UChar> result;
-
-    const unsigned char* sourcePointer = reinterpret_cast<const unsigned char*>(bytes);
-    int sourceLength = length;
-    bool bufferWasFull = false;
-    UniChar buffer[ConversionBufferSize];
-
-    while (sourceLength || bufferWasFull) {
-        int bytesRead = 0;
-        int bytesWritten = 0;
-        OSStatus status = decode(sourcePointer, sourceLength, bytesRead, buffer, sizeof(buffer), bytesWritten);
-        ASSERT(bytesRead <= sourceLength);
-        sourcePointer += bytesRead;
-        sourceLength -= bytesRead;
-        
-        switch (status) {
-            case noErr:
-            case kTECOutputBufferFullStatus:
-                break;
-            case kTextMalformedInputErr:
-            case kTextUndefinedElementErr:
-                // FIXME: Put FFFD character into the output string in this case?
-                TECClearConverterContextInfo(m_converterTEC);
-                if (sourceLength) {
-                    sourcePointer += 1;
-                    sourceLength -= 1;
-                }
-                break;
-            case kTECPartialCharErr: {
-                // Put the partial character into the buffer.
-                ASSERT(m_numBufferedBytes == 0);
-                const int bufferSize = sizeof(m_numBufferedBytes);
-                if (sourceLength < bufferSize) {
-                    memcpy(m_bufferedBytes, sourcePointer, sourceLength);
-                    m_numBufferedBytes = sourceLength;
-                } else {
-                    LOG_ERROR("TECConvertText gave a kTECPartialCharErr, but left %u bytes in the buffer", sourceLength);
-                }
-                sourceLength = 0;
-                break;
-            }
-            default:
-                LOG_ERROR("text decoding failed with error %ld", static_cast<long>(status));
-                m_error = true;
-                return String();
-        }
-
-        ASSERT(!(bytesWritten % sizeof(UChar)));
-        result.append(buffer, bytesWritten / sizeof(UChar));
-
-        bufferWasFull = status == kTECOutputBufferFullStatus;
-    }
-    
-    if (flush) {
-        unsigned long bytesWritten = 0;
-        TECFlushText(m_converterTEC, reinterpret_cast<unsigned char*>(buffer), sizeof(buffer), &bytesWritten);
-        ASSERT(!(bytesWritten % sizeof(UChar)));
-        result.append(buffer, bytesWritten / sizeof(UChar));
-    }
-
-    String resultString = String::adopt(result);
-
-    // <rdar://problem/3225472>
-    // Simplified Chinese pages use the code A3A0 to mean "full-width space".
-    // But GB18030 decodes it to U+E5E5, which is correct in theory but not in practice.
-    // To work around, just change all occurences of U+E5E5 to U+3000 (ideographic space).
-    if (m_encoding == kCFStringEncodingGB_18030_2000)
-        resultString.replace(0xE5E5, ideographicSpace);
-    
-    return resultString;
-}
-
-CString TextCodecMac::encode(const UChar* characters, size_t length, bool allowEntities)
-{
-    // FIXME: We should really use TEC here instead of CFString for consistency with the other direction.
-
-    // FIXME: Since there's no "force ASCII range" mode in CFString, we change the backslash into a yen sign.
-    // Encoding will change the yen sign back into a backslash.
-    String copy(characters, length);
-    copy.replace('\\', m_backslashAsCurrencySymbol);
-    CFStringRef cfs = copy.createCFString();
-
-    CFIndex startPos = 0;
-    CFIndex charactersLeft = CFStringGetLength(cfs);
-    Vector<char> result;
-    size_t size = 0;
-    UInt8 lossByte = allowEntities ? 0 : '?';
-    while (charactersLeft > 0) {
-        CFRange range = CFRangeMake(startPos, charactersLeft);
-        CFIndex bufferLength;
-        CFStringGetBytes(cfs, range, m_encoding, lossByte, false, NULL, 0x7FFFFFFF, &bufferLength);
-
-        result.grow(size + bufferLength);
-        unsigned char* buffer = reinterpret_cast<unsigned char*>(result.data() + size);
-        CFIndex charactersConverted = CFStringGetBytes(cfs, range, m_encoding, lossByte, false, buffer, bufferLength, &bufferLength);
-        size += bufferLength;
-
-        if (charactersConverted != charactersLeft) {
-            unsigned badChar = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
-            ++charactersConverted;
-            if ((badChar & 0xFC00) == 0xD800 && charactersConverted != charactersLeft) { // is high surrogate
-                UniChar low = CFStringGetCharacterAtIndex(cfs, startPos + charactersConverted);
-                if ((low & 0xFC00) == 0xDC00) { // is low surrogate
-                    badChar <<= 10;
-                    badChar += low;
-                    badChar += 0x10000 - (0xD800 << 10) - 0xDC00;
-                    ++charactersConverted;
-                }
-            }
-            char entityBuffer[16];
-            sprintf(entityBuffer, "&#%u;", badChar);
-            size_t entityLength = strlen(entityBuffer);
-            result.grow(size + entityLength);
-            memcpy(result.data() + size, entityBuffer, entityLength);
-            size += entityLength;
-        }
-
-        startPos += charactersConverted;
-        charactersLeft -= charactersConverted;
-    }
-    CFRelease(cfs);
-    return CString(result.data(), size);
-}
-
-} // namespace WebCore
author	mark@chromium.org <mark@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2008-10-03 22:15:04 +0000
committer	mark@chromium.org <mark@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2008-10-03 22:15:04 +0000
commit	e2bc4e3a7b54c26689379b01a61e72e7b86a8213 (patch)
tree	3b09d25a140a658c9befd60d1f27261cef1873ea /webkit/pending
parent	21b55170e4b56ea7cb85b46b6ea716e62a771e1a (diff)
download	chromium_src-e2bc4e3a7b54c26689379b01a61e72e7b86a8213.zip chromium_src-e2bc4e3a7b54c26689379b01a61e72e7b86a8213.tar.gz chromium_src-e2bc4e3a7b54c26689379b01a61e72e7b86a8213.tar.bz2