/* * Copyright (C) 1999 Lars Knoll (knoll@kde.org) * (C) 1999 Antti Koivisto (koivisto@kde.org) * (C) 2001 Dirk Mueller ( mueller@kde.org ) * Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 Apple Inc. All rights reserved. * Copyright (C) 2006 Andrew Wellington (proton@wiretapped.net) * * This library is free software; you can redistribute it and/or * modify it under the terms of the GNU Library General Public * License as published by the Free Software Foundation; either * version 2 of the License, or (at your option) any later version. * * This library is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU * Library General Public License for more details. * * You should have received a copy of the GNU Library General Public License * along with this library; see the file COPYING.LIB. If not, write to * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, * Boston, MA 02110-1301, USA. * */ #include "config.h" #include "StringImpl.h" #include "AtomicString.h" #include "CString.h" #include "CharacterNames.h" #include "DeprecatedString.h" #include "FloatConversion.h" #include "Length.h" #include "StringBuffer.h" #include "StringHash.h" #include "TextBreakIterator.h" #include "TextEncoding.h" #include #include #include #include using namespace WTF; using namespace Unicode; #if USE(JAVASCRIPTCORE_BINDINGS) using KJS::Identifier; using KJS::UString; #endif namespace WebCore { static inline UChar* newUCharVector(unsigned n) { return static_cast(fastMalloc(sizeof(UChar) * n)); } static inline void deleteUCharVector(const UChar* p) { fastFree(const_cast(p)); } // This constructor is used only to create the empty string. StringImpl::StringImpl() : RefCounted(1) , m_length(0) , m_data(0) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false) { } // This is one of the most common constructors, but it's also used for the copy() // operation. Because of that, it's the one constructor that doesn't assert the // length is non-zero, since we support copying the empty string. inline StringImpl::StringImpl(const UChar* characters, unsigned length) : RefCounted(1) , m_length(length) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false) { UChar* data = newUCharVector(length); memcpy(data, characters, length * sizeof(UChar)); m_data = data; } inline StringImpl::StringImpl(const StringImpl& str, WithTerminatingNullCharacter) : RefCounted(1) , m_length(str.m_length) , m_hash(str.m_hash) , m_inTable(false) , m_hasTerminatingNullCharacter(true) { UChar* data = newUCharVector(str.m_length + 1); memcpy(data, str.m_data, str.m_length * sizeof(UChar)); data[str.m_length] = 0; m_data = data; } inline StringImpl::StringImpl(const char* characters, unsigned length) : RefCounted(1) , m_length(length) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false) { ASSERT(characters); ASSERT(length); UChar* data = newUCharVector(length); for (unsigned i = 0; i != length; ++i) { unsigned char c = characters[i]; data[i] = c; } m_data = data; } inline StringImpl::StringImpl(UChar* characters, unsigned length, AdoptBuffer) : RefCounted(1) , m_length(length) , m_data(characters) , m_hash(0) , m_inTable(false) , m_hasTerminatingNullCharacter(false) { ASSERT(characters); ASSERT(length); } // FIXME: These AtomicString constructors return objects with a refCount of 0, // even though the others return objects with a refCount of 1. That preserves // the historical behavior for the hash map translator call sites inside the // AtomicString code, but is it correct? // This constructor is only for use by AtomicString. StringImpl::StringImpl(const UChar* characters, unsigned length, unsigned hash) : RefCounted(0) , m_length(length) , m_hash(hash) , m_inTable(true) , m_hasTerminatingNullCharacter(false) { ASSERT(hash); ASSERT(characters); ASSERT(length); UChar* data = newUCharVector(length); memcpy(data, characters, length * sizeof(UChar)); m_data = data; } // This constructor is only for use by AtomicString. StringImpl::StringImpl(const char* characters, unsigned length, unsigned hash) : RefCounted(0) , m_length(length) , m_hash(hash) , m_inTable(true) , m_hasTerminatingNullCharacter(false) { ASSERT(hash); ASSERT(characters); ASSERT(length); UChar* data = newUCharVector(length); for (unsigned i = 0; i != length; ++i) { unsigned char c = characters[i]; data[i] = c; } m_data = data; } StringImpl::~StringImpl() { if (m_inTable) AtomicString::remove(this); deleteUCharVector(m_data); } StringImpl* StringImpl::empty() { static StringImpl e; return &e; } bool StringImpl::containsOnlyWhitespace() { // FIXME: The definition of whitespace here includes a number of characters // that are not whitespace from the point of view of RenderText; I wonder if // that's a problem in practice. for (unsigned i = 0; i < m_length; i++) if (!isASCIISpace(m_data[i])) return false; return true; } PassRefPtr StringImpl::substring(unsigned pos, unsigned len) { if (pos >= m_length) return empty(); if (len > m_length - pos) len = m_length - pos; return create(m_data + pos, len); } UChar32 StringImpl::characterStartingAt(unsigned i) { if (U16_IS_SINGLE(m_data[i])) return m_data[i]; if (i + 1 < m_length && U16_IS_LEAD(m_data[i]) && U16_IS_TRAIL(m_data[i + 1])) return U16_GET_SUPPLEMENTARY(m_data[i], m_data[i + 1]); return 0; } static Length parseLength(const UChar* data, unsigned length) { if (length == 0) return Length(1, Relative); unsigned i = 0; while (i < length && isSpaceOrNewline(data[i])) ++i; if (i < length && (data[i] == '+' || data[i] == '-')) ++i; while (i < length && Unicode::isDigit(data[i])) ++i; bool ok; int r = DeprecatedConstString(reinterpret_cast(data), i).string().toInt(&ok); /* Skip over any remaining digits, we are not that accurate (5.5% => 5%) */ while (i < length && (Unicode::isDigit(data[i]) || data[i] == '.')) ++i; /* IE Quirk: Skip any whitespace (20 % => 20%) */ while (i < length && isSpaceOrNewline(data[i])) ++i; if (ok) { if (i < length) { UChar next = data[i]; if (next == '%') return Length(static_cast(r), Percent); if (next == '*') return Length(r, Relative); } return Length(r, Fixed); } else { if (i < length) { UChar next = data[i]; if (next == '*') return Length(1, Relative); if (next == '%') return Length(1, Relative); } } return Length(0, Relative); } Length StringImpl::toLength() { return parseLength(m_data, m_length); } static int countCharacter(StringImpl* string, UChar character) { int count = 0; int length = string->length(); for (int i = 0; i < length; ++i) count += (*string)[i] == character; return count; } Length* StringImpl::toCoordsArray(int& len) { StringBuffer spacified(m_length); for (unsigned i = 0; i < m_length; i++) { UChar cc = m_data[i]; if (cc > '9' || (cc < '0' && cc != '-' && cc != '*' && cc != '.')) spacified[i] = ' '; else spacified[i] = cc; } RefPtr str = adopt(spacified); str = str->simplifyWhiteSpace(); len = countCharacter(str.get(), ' ') + 1; Length* r = new Length[len]; int i = 0; int pos = 0; int pos2; while ((pos2 = str->find(' ', pos)) != -1) { r[i++] = parseLength(str->characters() + pos, pos2 - pos); pos = pos2+1; } r[i] = parseLength(str->characters() + pos, str->length() - pos); ASSERT(i == len - 1); return r; } Length* StringImpl::toLengthArray(int& len) { RefPtr str = simplifyWhiteSpace(); if (!str->length()) { len = 1; return 0; } len = countCharacter(str.get(), ',') + 1; Length* r = new Length[len]; int i = 0; int pos = 0; int pos2; while ((pos2 = str->find(',', pos)) != -1) { r[i++] = parseLength(str->characters() + pos, pos2 - pos); pos = pos2+1; } ASSERT(i == len - 1); /* IE Quirk: If the last comma is the last char skip it and reduce len by one */ if (str->length()-pos > 0) r[i] = parseLength(str->characters() + pos, str->length() - pos); else len--; return r; } bool StringImpl::isLower() { // Do a faster loop for the case where all the characters are ASCII. bool allLower = true; UChar ored = 0; for (unsigned i = 0; i < m_length; i++) { UChar c = m_data[i]; allLower = allLower && isASCIILower(c); ored |= c; } if (!(ored & ~0x7F)) return allLower; // Do a slower check for cases that include non-ASCII characters. allLower = true; unsigned i = 0; while (i < m_length) { UChar32 character; U16_NEXT(m_data, i, m_length, character) allLower = allLower && Unicode::isLower(character); } return allLower; } PassRefPtr StringImpl::lower() { StringBuffer data(m_length); int32_t length = m_length; // Do a faster loop for the case where all the characters are ASCII. UChar ored = 0; for (int i = 0; i < length; i++) { UChar c = m_data[i]; ored |= c; data[i] = toASCIILower(c); } if (!(ored & ~0x7F)) return adopt(data); // Do a slower implementation for cases that include non-ASCII characters. bool error; int32_t realLength = Unicode::toLower(data.characters(), length, m_data, m_length, &error); if (!error && realLength == length) return adopt(data); data.resize(realLength); Unicode::toLower(data.characters(), realLength, m_data, m_length, &error); if (error) return this; return adopt(data); } PassRefPtr StringImpl::upper() { bool error; int32_t length = Unicode::toUpper(0, 0, m_data, m_length, &error); StringBuffer data(length); Unicode::toUpper(data.characters(), length, m_data, m_length, &error); if (error) return this; return adopt(data); } PassRefPtr StringImpl::secure(UChar aChar) { int length = m_length; StringBuffer data(length); for (int i = 0; i < length; ++i) data[i] = aChar; return adopt(data); } PassRefPtr StringImpl::foldCase() { StringBuffer data(m_length); int32_t length = m_length; // Do a faster loop for the case where all the characters are ASCII. UChar ored = 0; for (int i = 0; i < length; i++) { UChar c = m_data[i]; ored |= c; data[i] = toASCIILower(c); } if (!(ored & ~0x7F)) return adopt(data); // Do a slower implementation for cases that include non-ASCII characters. bool error; int32_t realLength = Unicode::foldCase(data.characters(), length, m_data, m_length, &error); if (!error && realLength == length) return adopt(data); data.resize(realLength); Unicode::foldCase(data.characters(), realLength, m_data, m_length, &error); if (error) return this; return adopt(data); } PassRefPtr StringImpl::stripWhiteSpace() { if (!m_length) return empty(); unsigned start = 0; unsigned end = m_length - 1; // skip white space from start while (start <= end && isSpaceOrNewline(m_data[start])) start++; // only white space if (start > end) return empty(); // skip white space from end while (end && isSpaceOrNewline(m_data[end])) end--; return create(m_data + start, end + 1 - start); } PassRefPtr StringImpl::simplifyWhiteSpace() { StringBuffer data(m_length); const UChar* from = m_data; const UChar* fromend = from + m_length; int outc = 0; UChar* to = data.characters(); while (true) { while (from != fromend && isSpaceOrNewline(*from)) from++; while (from != fromend && !isSpaceOrNewline(*from)) to[outc++] = *from++; if (from != fromend) to[outc++] = ' '; else break; } if (outc > 0 && to[outc - 1] == ' ') outc--; data.shrink(outc); return adopt(data); } PassRefPtr StringImpl::capitalize(UChar previous) { StringBuffer stringWithPrevious(m_length + 1); stringWithPrevious[0] = previous == noBreakSpace ? ' ' : previous; for (unsigned i = 1; i < m_length + 1; i++) { // Replace with a real space since ICU no longer treats as a word separator. if (m_data[i - 1] == noBreakSpace) stringWithPrevious[i] = ' '; else stringWithPrevious[i] = m_data[i - 1]; } TextBreakIterator* boundary = wordBreakIterator(stringWithPrevious.characters(), m_length + 1); if (!boundary) return this; StringBuffer data(m_length); int32_t endOfWord; int32_t startOfWord = textBreakFirst(boundary); for (endOfWord = textBreakNext(boundary); endOfWord != TextBreakDone; startOfWord = endOfWord, endOfWord = textBreakNext(boundary)) { if (startOfWord != 0) // Ignore first char of previous string data[startOfWord - 1] = m_data[startOfWord - 1] == noBreakSpace ? noBreakSpace : toTitleCase(stringWithPrevious[startOfWord]); for (int i = startOfWord + 1; i < endOfWord; i++) data[i - 1] = m_data[i - 1]; } return adopt(data); } int StringImpl::toInt(bool* ok) { unsigned i = 0; // Allow leading spaces. for (; i != m_length; ++i) if (!isSpaceOrNewline(m_data[i])) break; // Allow sign. if (i != m_length && (m_data[i] == '+' || m_data[i] == '-')) ++i; // Allow digits. for (; i != m_length; ++i) if (!Unicode::isDigit(m_data[i])) break; return DeprecatedConstString(reinterpret_cast(m_data), i).string().toInt(ok); } int64_t StringImpl::toInt64(bool* ok) { unsigned i = 0; // Allow leading spaces. for (; i != m_length; ++i) if (!isSpaceOrNewline(m_data[i])) break; // Allow sign. if (i != m_length && (m_data[i] == '+' || m_data[i] == '-')) ++i; // Allow digits. for (; i != m_length; ++i) if (!Unicode::isDigit(m_data[i])) break; return DeprecatedConstString(reinterpret_cast(m_data), i).string().toInt64(ok); } uint64_t StringImpl::toUInt64(bool* ok) { unsigned i = 0; // Allow leading spaces. for (; i != m_length; ++i) if (!isSpaceOrNewline(m_data[i])) break; // Allow digits. for (; i != m_length; ++i) if (!Unicode::isDigit(m_data[i])) break; return DeprecatedConstString(reinterpret_cast(m_data), i).string().toUInt64(ok); } double StringImpl::toDouble(bool* ok) { if (!m_length) { if (ok) *ok = false; return 0; } char *end; CString latin1String = Latin1Encoding().encode(characters(), length()); #if USE(JAVASCRIPTCORE_BINDINGS) double val = kjs_strtod(latin1String.data(), &end); #else // V8_BINDING double val = strtod(latin1String.data(), &end); #endif if (ok) *ok = end == 0 || *end == '\0'; return val; } float StringImpl::toFloat(bool* ok) { // FIXME: This will return ok even when the string fits into a double but not a float. return narrowPrecisionToFloat(toDouble(ok)); } static bool equal(const UChar* a, const char* b, int length) { ASSERT(length >= 0); while (length--) { unsigned char bc = *b++; if (*a++ != bc) return false; } return true; } static bool equalIgnoringCase(const UChar* a, const char* b, int length) { ASSERT(length >= 0); while (length--) { unsigned char bc = *b++; if (foldCase(*a++) != foldCase(bc)) return false; } return true; } static inline bool equalIgnoringCase(const UChar* a, const UChar* b, int length) { ASSERT(length >= 0); return umemcasecmp(a, b, length) == 0; } int StringImpl::find(const char* chs, int index, bool caseSensitive) { if (!chs || index < 0) return -1; int chsLength = strlen(chs); int n = m_length - index; if (n < 0) return -1; n -= chsLength - 1; if (n <= 0) return -1; const char* chsPlusOne = chs + 1; int chsLengthMinusOne = chsLength - 1; const UChar* ptr = m_data + index - 1; if (caseSensitive) { UChar c = *chs; do { if (*++ptr == c && equal(ptr + 1, chsPlusOne, chsLengthMinusOne)) return m_length - chsLength - n + 1; } while (--n); } else { UChar lc = Unicode::foldCase(*chs); do { if (Unicode::foldCase(*++ptr) == lc && equalIgnoringCase(ptr + 1, chsPlusOne, chsLengthMinusOne)) return m_length - chsLength - n + 1; } while (--n); } return -1; } int StringImpl::find(UChar c, int start) { unsigned index = start; if (index >= m_length ) return -1; while(index < m_length) { if (m_data[index] == c) return index; index++; } return -1; } int StringImpl::find(StringImpl* str, int index, bool caseSensitive) { /* We use a simple trick for efficiency's sake. Instead of comparing strings, we compare the sum of str with that of a part of this string. Only if that matches, we call memcmp or ucstrnicmp. */ ASSERT(str); if (index < 0) index += m_length; int lstr = str->m_length; int lthis = m_length - index; if ((unsigned)lthis > m_length) return -1; int delta = lthis - lstr; if (delta < 0) return -1; const UChar* uthis = m_data + index; const UChar* ustr = str->m_data; unsigned hthis = 0; unsigned hstr = 0; if (caseSensitive) { for (int i = 0; i < lstr; i++) { hthis += uthis[i]; hstr += ustr[i]; } int i = 0; while (1) { if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) return index + i; if (i == delta) return -1; hthis += uthis[i + lstr]; hthis -= uthis[i]; i++; } } else { for (int i = 0; i < lstr; i++ ) { hthis += toASCIILower(uthis[i]); hstr += toASCIILower(ustr[i]); } int i = 0; while (1) { if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr)) return index + i; if (i == delta) return -1; hthis += toASCIILower(uthis[i + lstr]); hthis -= toASCIILower(uthis[i]); i++; } } } int StringImpl::reverseFind(UChar c, int index) { if (index >= (int)m_length || m_length == 0) return -1; if (index < 0) index += m_length; while (1) { if (m_data[index] == c) return index; if (index == 0) return -1; index--; } } int StringImpl::reverseFind(StringImpl* str, int index, bool caseSensitive) { /* See StringImpl::find() for explanations. */ ASSERT(str); int lthis = m_length; if (index < 0) index += lthis; int lstr = str->m_length; int delta = lthis - lstr; if ( index < 0 || index > lthis || delta < 0 ) return -1; if ( index > delta ) index = delta; const UChar *uthis = m_data; const UChar *ustr = str->m_data; unsigned hthis = 0; unsigned hstr = 0; int i; if (caseSensitive) { for ( i = 0; i < lstr; i++ ) { hthis += uthis[index + i]; hstr += ustr[i]; } i = index; while (1) { if (hthis == hstr && memcmp(uthis + i, ustr, lstr * sizeof(UChar)) == 0) return i; if (i == 0) return -1; i--; hthis -= uthis[i + lstr]; hthis += uthis[i]; } } else { for (i = 0; i < lstr; i++) { hthis += toASCIILower(uthis[index + i]); hstr += toASCIILower(ustr[i]); } i = index; while (1) { if (hthis == hstr && equalIgnoringCase(uthis + i, ustr, lstr) ) return i; if (i == 0) return -1; i--; hthis -= toASCIILower(uthis[i + lstr]); hthis += toASCIILower(uthis[i]); } } // Should never get here. return -1; } bool StringImpl::endsWith(StringImpl* m_data, bool caseSensitive) { ASSERT(m_data); int start = m_length - m_data->m_length; if (start >= 0) return (find(m_data, start, caseSensitive) == start); return false; } PassRefPtr StringImpl::replace(UChar oldC, UChar newC) { if (oldC == newC) return this; unsigned i; for (i = 0; i != m_length; ++i) if (m_data[i] == oldC) break; if (i == m_length) return this; StringBuffer data(m_length); for (i = 0; i != m_length; ++i) { UChar ch = m_data[i]; if (ch == oldC) ch = newC; data[i] = ch; } return adopt(data); } PassRefPtr StringImpl::replace(unsigned position, unsigned lengthToReplace, StringImpl* str) { position = min(position, length()); lengthToReplace = min(lengthToReplace, length() - position); unsigned lengthToInsert = str ? str->length() : 0; if (!lengthToReplace && !lengthToInsert) return this; StringBuffer buffer(length() - lengthToReplace + lengthToInsert); memcpy(buffer.characters(), characters(), position * sizeof(UChar)); if (str) memcpy(buffer.characters() + position, str->characters(), lengthToInsert * sizeof(UChar)); memcpy(buffer.characters() + position + lengthToInsert, characters() + position + lengthToReplace, (length() - position - lengthToReplace) * sizeof(UChar)); return adopt(buffer); } PassRefPtr StringImpl::replace(UChar pattern, StringImpl* replacement) { if (!replacement) return this; int repStrLength = replacement->length(); int srcSegmentStart = 0; int matchCount = 0; // Count the matches while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) { ++matchCount; ++srcSegmentStart; } // If we have 0 matches, we don't have to do any more work if (!matchCount) return this; StringBuffer data(m_length - matchCount + (matchCount * repStrLength)); // Construct the new data int srcSegmentEnd; int srcSegmentLength; srcSegmentStart = 0; int dstOffset = 0; while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) { srcSegmentLength = srcSegmentEnd - srcSegmentStart; memcpy(data.characters() + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); dstOffset += srcSegmentLength; memcpy(data.characters() + dstOffset, replacement->m_data, repStrLength * sizeof(UChar)); dstOffset += repStrLength; srcSegmentStart = srcSegmentEnd + 1; } srcSegmentLength = m_length - srcSegmentStart; memcpy(data.characters() + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); ASSERT(dstOffset + srcSegmentLength == static_cast(data.length())); return adopt(data); } PassRefPtr StringImpl::replace(StringImpl* pattern, StringImpl* replacement) { if (!pattern || !replacement) return this; int patternLength = pattern->length(); if (!patternLength) return this; int repStrLength = replacement->length(); int srcSegmentStart = 0; int matchCount = 0; // Count the matches while ((srcSegmentStart = find(pattern, srcSegmentStart)) >= 0) { ++matchCount; srcSegmentStart += patternLength; } // If we have 0 matches, we don't have to do any more work if (!matchCount) return this; StringBuffer data(m_length + matchCount * (repStrLength - patternLength)); // Construct the new data int srcSegmentEnd; int srcSegmentLength; srcSegmentStart = 0; int dstOffset = 0; while ((srcSegmentEnd = find(pattern, srcSegmentStart)) >= 0) { srcSegmentLength = srcSegmentEnd - srcSegmentStart; memcpy(data.characters() + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); dstOffset += srcSegmentLength; memcpy(data.characters() + dstOffset, replacement->m_data, repStrLength * sizeof(UChar)); dstOffset += repStrLength; srcSegmentStart = srcSegmentEnd + patternLength; } srcSegmentLength = m_length - srcSegmentStart; memcpy(data.characters() + dstOffset, m_data + srcSegmentStart, srcSegmentLength * sizeof(UChar)); ASSERT(dstOffset + srcSegmentLength == static_cast(data.length())); return adopt(data); } bool equal(StringImpl* a, StringImpl* b) { return StringHash::equal(a, b); } bool equal(StringImpl* a, const char* b) { if (!a) return !b; if (!b) return !a; unsigned length = a->length(); const UChar* as = a->characters(); for (unsigned i = 0; i != length; ++i) { unsigned char bc = b[i]; if (!bc) return false; if (as[i] != bc) return false; } return !b[length]; } bool equalIgnoringCase(StringImpl* a, StringImpl* b) { return CaseFoldingHash::equal(a, b); } bool equalIgnoringCase(StringImpl* a, const char* b) { if (!a) return !b; if (!b) return !a; unsigned length = a->length(); const UChar* as = a->characters(); // Do a faster loop for the case where all the characters are ASCII. UChar ored = 0; bool equal = true; for (unsigned i = 0; i != length; ++i) { char bc = b[i]; if (!bc) return false; UChar ac = as[i]; ored |= ac; equal = equal && (toASCIILower(ac) == toASCIILower(bc)); } // Do a slower implementation for cases that include non-ASCII characters. if (ored & ~0x7F) { equal = true; for (unsigned i = 0; i != length; ++i) { unsigned char bc = b[i]; equal = equal && (foldCase(as[i]) == foldCase(bc)); } } return equal && !b[length]; } Vector StringImpl::ascii() { Vector buffer(m_length + 1); for (unsigned i = 0; i != m_length; ++i) { UChar c = m_data[i]; if ((c >= 0x20 && c < 0x7F) || c == 0x00) buffer[i] = c; else buffer[i] = '?'; } buffer[m_length] = '\0'; return buffer; } WTF::Unicode::Direction StringImpl::defaultWritingDirection() { for (unsigned i = 0; i < m_length; ++i) { WTF::Unicode::Direction charDirection = WTF::Unicode::direction(m_data[i]); if (charDirection == WTF::Unicode::LeftToRight) return WTF::Unicode::LeftToRight; if (charDirection == WTF::Unicode::RightToLeft || charDirection == WTF::Unicode::RightToLeftArabic) return WTF::Unicode::RightToLeft; } return WTF::Unicode::LeftToRight; } // This is a hot function because it's used when parsing HTML. PassRefPtr StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length) { ASSERT(characters); ASSERT(length); StringBuffer strippedCopy(length); int foundNull = 0; for (unsigned i = 0; i < length; i++) { int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS) strippedCopy[i] = c; foundNull |= ~c; } if (!foundNull) return adoptRef(new StringImpl(strippedCopy.release(), length, AdoptBuffer())); unsigned strippedLength = 0; for (unsigned i = 0; i < length; i++) { if (int c = characters[i]) strippedCopy[strippedLength++] = c; } strippedCopy.shrink(strippedLength); return adopt(strippedCopy); } PassRefPtr StringImpl::adopt(StringBuffer& buffer) { unsigned length = buffer.length(); if (length == 0) return empty(); return adoptRef(new StringImpl(buffer.release(), length, AdoptBuffer())); } PassRefPtr StringImpl::adopt(Vector& vector) { size_t size = vector.size(); if (size == 0) return empty(); return adoptRef(new StringImpl(vector.releaseBuffer(), size, AdoptBuffer())); } PassRefPtr StringImpl::create(const UChar* characters, unsigned length) { if (!characters || !length) return empty(); return adoptRef(new StringImpl(characters, length)); } PassRefPtr StringImpl::create(const char* characters, unsigned length) { if (!characters || !length) return empty(); return adoptRef(new StringImpl(characters, length)); } PassRefPtr StringImpl::create(const char* string) { if (!string) return empty(); unsigned length = strlen(string); if (!length) return empty(); return adoptRef(new StringImpl(string, length)); } PassRefPtr StringImpl::createWithTerminatingNullCharacter(const StringImpl& string) { return adoptRef(new StringImpl(string, WithTerminatingNullCharacter())); } PassRefPtr StringImpl::copy() { return adoptRef(new StringImpl(m_data, m_length)); } } // namespace WebCore