diff options
author | brettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-11-17 17:30:19 +0000 |
---|---|---|
committer | brettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98> | 2008-11-17 17:30:19 +0000 |
commit | 9b9877d2952882560e37205edf33ad460a7efa91 (patch) | |
tree | 452db327f3b534b65d8d8260213094c5d9e6d00d /base/gfx | |
parent | b6f2b91367ba6ed9ebb305233d4e786c1c748e45 (diff) | |
download | chromium_src-9b9877d2952882560e37205edf33ad460a7efa91.zip chromium_src-9b9877d2952882560e37205edf33ad460a7efa91.tar.gz chromium_src-9b9877d2952882560e37205edf33ad460a7efa91.tar.bz2 |
Debase our Uniscribe code. This moves FontUtils and all our Uniscribe code from
base/gfx to webkit/port/platform/graphics. I fixed the indenting and naming of
the moved code.
Review URL: http://codereview.chromium.org/10785
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@5561 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/gfx')
-rw-r--r-- | base/gfx/base_gfx.scons | 4 | ||||
-rw-r--r-- | base/gfx/font_utils.cc | 336 | ||||
-rw-r--r-- | base/gfx/font_utils.h | 87 | ||||
-rw-r--r-- | base/gfx/uniscribe.cc | 848 | ||||
-rw-r--r-- | base/gfx/uniscribe.h | 366 | ||||
-rw-r--r-- | base/gfx/uniscribe_unittest.cc | 140 |
6 files changed, 0 insertions, 1781 deletions
diff --git a/base/gfx/base_gfx.scons b/base/gfx/base_gfx.scons index 9892d66..748f13b 100644 --- a/base/gfx/base_gfx.scons +++ b/base/gfx/base_gfx.scons @@ -33,7 +33,6 @@ if env['PLATFORM'] == 'win32': input_files = [ 'convolver.cc', - 'font_utils.cc', 'gdi_util.cc', 'image_operations.cc', 'native_theme.cc', @@ -43,7 +42,6 @@ input_files = [ 'rect.cc', 'size.cc', 'skia_utils.cc', - 'uniscribe.cc', 'vector_canvas.cc', 'vector_device.cc', ] @@ -52,11 +50,9 @@ if env['PLATFORM'] in ('posix', 'darwin'): # Remove files that still need to be ported from the input_files list. # TODO(port): delete files from this list as they get ported. to_be_ported_files = [ - 'font_utils.cc', 'gdi_util.cc', 'native_theme.cc', 'skia_utils.cc', - 'uniscribe.cc', 'vector_canvas.cc', 'vector_device.cc', ] diff --git a/base/gfx/font_utils.cc b/base/gfx/font_utils.cc deleted file mode 100644 index 23c7f3a..0000000 --- a/base/gfx/font_utils.cc +++ /dev/null @@ -1,336 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/gfx/font_utils.h" - -#include <limits> -#include <map> - -#include "base/gfx/uniscribe.h" -#include "base/logging.h" -#include "base/singleton.h" -#include "base/string_util.h" -#include "unicode/locid.h" -#include "unicode/uchar.h" - -namespace gfx { - -namespace { - -// hash_map has extra cost with no sizable gain for a small number of integer -// key items. When the map size becomes much bigger (which will be later as -// more scripts are added) and this turns out to be prominent in the profile, we -// may consider switching to hash_map (or just an array if we support all the -// scripts) -typedef std::map<UScriptCode, const wchar_t*> ScriptToFontMap; - -struct ScriptToFontMapSingletonTraits - : public DefaultSingletonTraits<ScriptToFontMap> { - static ScriptToFontMap* New() { - struct FontMap { - UScriptCode script; - const wchar_t* family; - }; - - const static FontMap font_map[] = { - {USCRIPT_LATIN, L"times new roman"}, - {USCRIPT_GREEK, L"times new roman"}, - {USCRIPT_CYRILLIC, L"times new roman"}, - {USCRIPT_SIMPLIFIED_HAN, L"simsun"}, - //{USCRIPT_TRADITIONAL_HAN, L"pmingliu"}, - {USCRIPT_HIRAGANA, L"ms pgothic"}, - {USCRIPT_KATAKANA, L"ms pgothic"}, - {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"}, - {USCRIPT_HANGUL, L"gulim"}, - {USCRIPT_THAI, L"tahoma"}, - {USCRIPT_HEBREW, L"david"}, - {USCRIPT_ARABIC, L"tahoma"}, - {USCRIPT_DEVANAGARI, L"mangal"}, - {USCRIPT_BENGALI, L"vrinda"}, - {USCRIPT_GURMUKHI, L"raavi"}, - {USCRIPT_GUJARATI, L"shruti"}, - {USCRIPT_ORIYA, L"kalinga"}, - {USCRIPT_TAMIL, L"latha"}, - {USCRIPT_TELUGU, L"gautami"}, - {USCRIPT_KANNADA, L"tunga"}, - {USCRIPT_MALAYALAM, L"kartika"}, - {USCRIPT_LAO, L"dokchampa"}, - {USCRIPT_TIBETAN, L"microsoft himalaya"}, - {USCRIPT_GEORGIAN, L"sylfaen"}, - {USCRIPT_ARMENIAN, L"sylfaen"}, - {USCRIPT_ETHIOPIC, L"nyala"}, - {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"}, - {USCRIPT_CHEROKEE, L"plantagenet cherokee"}, - {USCRIPT_YI, L"microsoft yi balti"}, - {USCRIPT_SINHALA, L"iskoola pota"}, - {USCRIPT_SYRIAC, L"estrangelo edessa"}, - {USCRIPT_KHMER, L"daunpenh"}, - {USCRIPT_THAANA, L"mv boli"}, - {USCRIPT_MONGOLIAN, L"mongolian balti"}, - {USCRIPT_MYANMAR, L"padauk"}, - // For USCRIPT_COMMON, we map blocks to scripts when - // that makes sense. - }; - - ScriptToFontMap* new_instance = new ScriptToFontMap; - // Cannot recover from OOM so that there's no need to check. - for (int i = 0; i < arraysize(font_map); ++i) - (*new_instance)[font_map[i].script] = font_map[i].family; - - // Initialize the locale-dependent mapping. - // Since Chrome synchronizes the ICU default locale with its UI locale, - // this ICU locale tells the current UI locale of Chrome. - Locale locale = Locale::getDefault(); - ScriptToFontMap::const_iterator iter; - if (locale == Locale::getJapanese()) { - iter = new_instance->find(USCRIPT_HIRAGANA); - } else if (locale == Locale::getKorean()) { - iter = new_instance->find(USCRIPT_HANGUL); - } else { - // Use Simplified Chinese font for all other locales including - // Traditional Chinese because Simsun (SC font) has a wider - // coverage (covering both SC and TC) than PMingLiu (TC font). - // This also speeds up the TC version of Chrome when rendering SC pages. - iter = new_instance->find(USCRIPT_SIMPLIFIED_HAN); - } - if (iter != new_instance->end()) - (*new_instance)[USCRIPT_HAN] = iter->second; - - return new_instance; - } -}; - -Singleton<ScriptToFontMap, ScriptToFontMapSingletonTraits> script_font_map; - -const int kUndefinedAscent = std::numeric_limits<int>::min(); - -// Given an HFONT, return the ascent. If GetTextMetrics fails, -// kUndefinedAscent is returned, instead. -int GetAscent(HFONT hfont) { - HDC dc = GetDC(NULL); - HGDIOBJ oldFont = SelectObject(dc, hfont); - TEXTMETRIC tm; - BOOL got_metrics = GetTextMetrics(dc, &tm); - SelectObject(dc, oldFont); - ReleaseDC(NULL, dc); - return got_metrics ? tm.tmAscent : kUndefinedAscent; -} - -struct FontData { - FontData() : hfont(NULL), ascent(kUndefinedAscent), script_cache(NULL) {} - HFONT hfont; - int ascent; - mutable SCRIPT_CACHE script_cache; -}; - -// Again, using hash_map does not earn us much here. -// page_cycler_test intl2 gave us a 'better' result with map than with hash_map -// even though they're well-within 1-sigma of each other so that the difference -// is not significant. On the other hand, some pages in intl2 seem to -// take longer to load with map in the 1st pass. Need to experiment further. -typedef std::map<std::wstring, FontData*> FontDataCache; -struct FontDataCacheSingletonTraits - : public DefaultSingletonTraits<FontDataCache> { - static void Delete(FontDataCache* cache) { - FontDataCache::iterator iter = cache->begin(); - while (iter != cache->end()) { - SCRIPT_CACHE script_cache = iter->second->script_cache; - if (script_cache) - ScriptFreeCache(&script_cache); - delete iter->second; - ++iter; - } - delete cache; - } -}; - -} // namespace - -// TODO(jungshik) : this is font fallback code version 0.1 -// - Cover all the scripts -// - Get the default font for each script/generic family from the -// preference instead of hardcoding in the source. -// (at least, read values from the registry for IE font settings). -// - Support generic families (from FontDescription) -// - If the default font for a script is not available, -// try some more fonts known to support it. Finally, we can -// use EnumFontFamilies or similar APIs to come up with a list of -// fonts supporting the script and cache the result. -// - Consider using UnicodeSet (or UnicodeMap) converted from -// GLYPHSET (BMP) or directly read from truetype cmap tables to -// keep track of which character is supported by which font -// - Update script_font_cache in response to WM_FONTCHANGE - -const wchar_t* GetFontFamilyForScript(UScriptCode script, - GenericFamilyType generic) { - ScriptToFontMap::const_iterator iter = script_font_map->find(script); - const wchar_t* family = NULL; - if (iter != script_font_map->end()) { - family = iter->second; - } - return family; -} - -// TODO(jungshik) -// - Handle 'Inherited', 'Common' and 'Unknown' -// (see http://www.unicode.org/reports/tr24/#Usage_Model ) -// For 'Inherited' and 'Common', perhaps we need to -// accept another parameter indicating the previous family -// and just return it. -// - All the characters (or characters up to the point a single -// font can cover) need to be taken into account -const wchar_t* GetFallbackFamily(const wchar_t *characters, - int length, - GenericFamilyType generic, - UChar32 *char_checked, - UScriptCode *script_checked) { - DCHECK(characters && characters[0] && length > 0); - UScriptCode script = USCRIPT_COMMON; - - // Sometimes characters common to script (e.g. space) is at - // the beginning of a string so that we need to skip them - // to get a font required to render the string. - int i = 0; - UChar32 ucs4 = 0; - while (i < length && script == USCRIPT_COMMON || - script == USCRIPT_INVALID_CODE) { - U16_NEXT(characters, i, length, ucs4); - UErrorCode err = U_ZERO_ERROR; - script = uscript_getScript(ucs4, &err); - // silently ignore the error - } - - // hack for full width ASCII. For the full-width ASCII, use the font - // for Han (which is locale-dependent). - if (0xFF00 < ucs4 && ucs4 < 0xFF5F) - script = USCRIPT_HAN; - - // There are a lot of characters in USCRIPT_COMMON that can be covered - // by fonts for scripts closely related to them. - // See http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:] - // TODO(jungshik): make this more efficient with a wider coverage - if (script == USCRIPT_COMMON || script == USCRIPT_INHERITED) { - UBlockCode block = ublock_getCode(ucs4); - switch (block) { - case UBLOCK_BASIC_LATIN: - script = USCRIPT_LATIN; - break; - case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION: - script = USCRIPT_HAN; - break; - case UBLOCK_HIRAGANA: - case UBLOCK_KATAKANA: - script = USCRIPT_HIRAGANA; - break; - case UBLOCK_ARABIC: - script = USCRIPT_ARABIC; - break; - case UBLOCK_GREEK: - script = USCRIPT_GREEK; - break; - case UBLOCK_DEVANAGARI: - // For Danda and Double Danda (U+0964, U+0965), use a Devanagari - // font for now although they're used by other scripts as well. - // Without a context, we can't do any better. - script = USCRIPT_DEVANAGARI; - break; - case UBLOCK_ARMENIAN: - script = USCRIPT_ARMENIAN; - break; - case UBLOCK_GEORGIAN: - script = USCRIPT_GEORGIAN; - break; - case UBLOCK_KANNADA: - script = USCRIPT_KANNADA; - break; - } - } - - // Another lame work-around to cover non-BMP characters. - const wchar_t* family = GetFontFamilyForScript(script, generic); - if (!family) { - int plane = ucs4 >> 16; - switch (plane) { - case 1: - family = L"code2001"; - break; - case 2: - family = L"simsun-extb"; - break; - default: - family = L"lucida sans unicode"; - } - } - - if (char_checked) *char_checked = ucs4; - if (script_checked) *script_checked = script; - return family; -} - - - -// Be aware that this is not thread-safe. -bool GetDerivedFontData(const wchar_t *family, - int style, - LOGFONT *logfont, - int *ascent, - HFONT *hfont, - SCRIPT_CACHE **script_cache) { - DCHECK(logfont && family && *family); - // Using |Singleton| here is not free, but the intl2 page cycler test - // does not show any noticeable difference with and without it. Leaking - // the contents of FontDataCache (especially SCRIPT_CACHE) at the end - // of a renderer process may not be a good idea. We may use - // atexit(). However, with no noticeable performance difference, |Singleton| - // is cleaner, I believe. - FontDataCache* font_data_cache = - Singleton<FontDataCache, FontDataCacheSingletonTraits>::get(); - // TODO(jungshik) : This comes up pretty high in the profile so that - // we need to measure whether using SHA256 (after coercing all the - // fields to char*) is faster than StringPrintf. - std::wstring font_key = StringPrintf(L"%1d:%d:%ls", style, logfont->lfHeight, - family); - FontDataCache::const_iterator iter = font_data_cache->find(font_key); - FontData *derived; - if (iter == font_data_cache->end()) { - DCHECK(wcslen(family) < LF_FACESIZE); - wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family); - // TODO(jungshik): CreateFontIndirect always comes up with - // a font even if there's no font matching the name. Need to - // check it against what we actually want (as is done in FontCacheWin.cpp) - derived = new FontData; - derived->hfont = CreateFontIndirect(logfont); - // GetAscent may return kUndefinedAscent, but we still want to - // cache it so that we won't have to call CreateFontIndirect once - // more for HFONT next time. - derived->ascent = GetAscent(derived->hfont); - (*font_data_cache)[font_key] = derived; - } else { - derived = iter->second; - // Last time, GetAscent failed so that only HFONT was - // cached. Try once more assuming that TryPreloadFont - // was called by a caller between calls. - if (kUndefinedAscent == derived->ascent) - derived->ascent = GetAscent(derived->hfont); - } - *hfont = derived->hfont; - *ascent = derived->ascent; - *script_cache = &(derived->script_cache); - return *ascent != kUndefinedAscent; -} - -int GetStyleFromLogfont(const LOGFONT* logfont) { - // TODO(jungshik) : consider defining UNDEFINED or INVALID for style and - // returning it when logfont is NULL - if (!logfont) { - NOTREACHED(); - return FONT_STYLE_NORMAL; - } - return (logfont->lfItalic ? FONT_STYLE_ITALIC : FONT_STYLE_NORMAL) | - (logfont->lfUnderline ? FONT_STYLE_UNDERLINED : FONT_STYLE_NORMAL) | - (logfont->lfWeight >= 700 ? FONT_STYLE_BOLD : FONT_STYLE_NORMAL); -} - -} // namespace gfx - diff --git a/base/gfx/font_utils.h b/base/gfx/font_utils.h deleted file mode 100644 index f00db46..0000000 --- a/base/gfx/font_utils.h +++ /dev/null @@ -1,87 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// -// A collection of utilities for font handling. - -#ifndef BASE_GFX_FONT_UTILS_H__ -#define BASE_GFX_FONT_UTILS_H__ - -#include <usp10.h> -#include <wchar.h> -#include <windows.h> - -#include <unicode/uscript.h> - -namespace gfx { - -// The order of family types needs to be exactly the same as -// WebCore::FontDescription::GenericFamilyType. We may lift that restriction -// when we make webkit_glue::WebkitGenericToChromeGenericFamily more -// intelligent. -enum GenericFamilyType { - GENERIC_FAMILY_NONE = 0, - GENERIC_FAMILY_STANDARD, - GENERIC_FAMILY_SERIF, - GENERIC_FAMILY_SANSSERIF, - GENERIC_FAMILY_MONOSPACE, - GENERIC_FAMILY_CURSIVE, - GENERIC_FAMILY_FANTASY -}; - -// Return a font family that supports a script and belongs to |generic| font family. -// It can return NULL and a caller has to implement its own fallback. -const wchar_t* GetFontFamilyForScript(UScriptCode script, - GenericFamilyType generic); - -// Return a font family that can render |characters| based on -// what script characters belong to. When char_checked is non-NULL, -// it's filled with the character used to determine the script. -// When script_checked is non-NULL, the script used to determine -// the family is returned. -// TODO(jungshik) : This function needs a total overhaul. -const wchar_t* GetFallbackFamily(const wchar_t* characters, - int length, - GenericFamilyType generic, - UChar32 *char_checked, - UScriptCode *script_checked); -// Derive a new HFONT by replacing lfFaceName of LOGFONT with |family|, -// calculate the ascent for the derived HFONT, and initialize SCRIPT_CACHE -// in FontData. -// |style| is only used for cache key generation. |style| is -// bit-wise OR of BOLD(1), UNDERLINED(2) and ITALIC(4) and -// should match what's contained in LOGFONT. It should be calculated -// by calling GetStyleFromLogFont. -// Returns false if the font is not accessible, in which case |ascent| field -// of |fontdata| is set to kUndefinedAscent. -// Be aware that this is not thread-safe. -// TODO(jungshik): Instead of having three out params, we'd better have one -// (|*FontData|), but somehow it mysteriously messes up the layout for -// certain complex script pages (e.g. hi.wikipedia.org) and also crashes -// at the start-up if recently visited page list includes pages with complex -// scripts in their title. Moreover, somehow the very first-pass of -// intl2 page-cycler test is noticeably slower with one out param than -// the current version although the subsequent 9 passes take about the -// same time. -bool GetDerivedFontData(const wchar_t *family, - int style, - LOGFONT *logfont, - int *ascent, - HFONT *hfont, - SCRIPT_CACHE **script_cache); - -enum { - FONT_STYLE_NORMAL = 0, - FONT_STYLE_BOLD = 1, - FONT_STYLE_ITALIC = 2, - FONT_STYLE_UNDERLINED = 4 -}; - -// Derive style (bit-wise OR of FONT_STYLE_BOLD, FONT_STYLE_UNDERLINED, and -// FONT_STYLE_ITALIC) from LOGFONT. Returns 0 if |*logfont| is NULL. -int GetStyleFromLogfont(const LOGFONT *logfont); - -} // namespace gfx - -#endif // BASE_GFX_FONT_UTILS_H__ - diff --git a/base/gfx/uniscribe.cc b/base/gfx/uniscribe.cc deleted file mode 100644 index bdf4154..0000000 --- a/base/gfx/uniscribe.cc +++ /dev/null @@ -1,848 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <windows.h> - -#include "base/gfx/uniscribe.h" - -#include "base/gfx/font_utils.h" -#include "base/logging.h" - -namespace gfx { - -// This function is used to see where word spacing should be applied inside -// runs. Note that this must match Font::treatAsSpace so we all agree where -// and how much space this is, so we don't want to do more general Unicode -// "is this a word break" thing. -static bool TreatAsSpace(wchar_t c) { - return c == ' ' || c == '\t' || c == '\n' || c == 0x00A0; -} - -// SCRIPT_FONTPROPERTIES contains glyph indices for default, invalid -// and blank glyphs. Just because ScriptShape succeeds does not mean -// that a text run is rendered correctly. Some characters may be rendered -// with default/invalid/blank glyphs. Therefore, we need to check if the glyph -// array returned by ScriptShape contains any of those glyphs to make -// sure that the text run is rendered successfully. -static bool ContainsMissingGlyphs(WORD *glyphs, - int length, - SCRIPT_FONTPROPERTIES* properties) { - for (int i = 0; i < length; ++i) { - if (glyphs[i] == properties->wgDefault || - (glyphs[i] == properties->wgInvalid && glyphs[i] != properties->wgBlank)) - return true; - } - - return false; -} - -// HFONT is the 'incarnation' of 'everything' about font, but it's an opaque -// handle and we can't directly query it to make a new HFONT sharing -// its characteristics (height, style, etc) except for family name. -// This function uses GetObject to convert HFONT back to LOGFONT, -// resets the fields of LOGFONT and calculates style to use later -// for the creation of a font identical to HFONT other than family name. -static void SetLogFontAndStyle(HFONT hfont, LOGFONT *logfont, int *style) { - DCHECK(hfont && logfont); - if (!hfont || !logfont) - return; - - GetObject(hfont, sizeof(LOGFONT), logfont); - // We reset these fields to values appropriate for CreateFontIndirect. - // while keeping lfHeight, which is the most important value in creating - // a new font similar to hfont. - logfont->lfWidth = 0; - logfont->lfEscapement = 0; - logfont->lfOrientation = 0; - logfont->lfCharSet = DEFAULT_CHARSET; - logfont->lfOutPrecision = OUT_TT_ONLY_PRECIS; - logfont->lfQuality = DEFAULT_QUALITY; // Honor user's desktop settings. - logfont->lfPitchAndFamily = DEFAULT_PITCH | FF_DONTCARE; - if (style) - *style = gfx::GetStyleFromLogfont(logfont); -} - -UniscribeState::UniscribeState(const wchar_t* input, - int input_length, - bool is_rtl, - HFONT hfont, - SCRIPT_CACHE* script_cache, - SCRIPT_FONTPROPERTIES* font_properties) - : input_(input), - input_length_(input_length), - is_rtl_(is_rtl), - hfont_(hfont), - script_cache_(script_cache), - font_properties_(font_properties), - directional_override_(false), - inhibit_ligate_(false), - letter_spacing_(0), - space_width_(0), - word_spacing_(0), - ascent_(0) { - logfont_.lfFaceName[0] = 0; -} - -UniscribeState::~UniscribeState() { -} - -void UniscribeState::InitWithOptionalLengthProtection(bool length_protection) { - // We cap the input length and just don't do anything. We'll allocate a lot - // of things of the size of the number of characters, so the allocated memory - // will be several times the input length. Plus shaping such a large buffer - // may be a form of denial of service. No legitimate text should be this long. - // It also appears that Uniscribe flatly rejects very long strings, so we - // don't lose anything by doing this. - // - // The input length protection may be disabled by the unit tests to cause - // an error condition. - static const int kMaxInputLength = 65535; - if (input_length_ == 0 || - (length_protection && input_length_ > kMaxInputLength)) - return; - - FillRuns(); - FillShapes(); - FillScreenOrder(); -} - -int UniscribeState::Width() const { - int width = 0; - for (int item_index = 0; item_index < static_cast<int>(runs_->size()); - item_index++) { - width += AdvanceForItem(item_index); - } - return width; -} - -void UniscribeState::Justify(int additional_space) { - // Count the total number of glyphs we have so we know how big to make the - // buffers below. - int total_glyphs = 0; - for (size_t run = 0; run < runs_->size(); run++) { - int run_idx = screen_order_[run]; - total_glyphs += static_cast<int>(shapes_[run_idx].glyph_length()); - } - if (total_glyphs == 0) - return; // Nothing to do. - - // We make one big buffer in screen order of all the glyphs we are drawing - // across runs so that the justification function will adjust evenly across - // all glyphs. - StackVector<SCRIPT_VISATTR, 64> visattr; - visattr->resize(total_glyphs); - StackVector<int, 64> advances; - advances->resize(total_glyphs); - StackVector<int, 64> justify; - justify->resize(total_glyphs); - - // Build the packed input. - int dest_index = 0; - for (size_t run = 0; run < runs_->size(); run++) { - int run_idx = screen_order_[run]; - const Shaping& shaping = shapes_[run_idx]; - - for (int i = 0; i < shaping.glyph_length(); i++, dest_index++) { - memcpy(&visattr[dest_index], &shaping.visattr[i], sizeof(SCRIPT_VISATTR)); - advances[dest_index] = shaping.advance[i]; - } - } - - // The documentation for ScriptJustify is wrong, the parameter is the space - // to add and not the width of the column you want. - const int min_kashida = 1; // How do we decide what this should be? - ScriptJustify(&visattr[0], &advances[0], total_glyphs, additional_space, - min_kashida, &justify[0]); - - // Now we have to unpack the justification amounts back into the runs so - // the glyph indices match. - int global_glyph_index = 0; - for (size_t run = 0; run < runs_->size(); run++) { - int run_idx = screen_order_[run]; - Shaping& shaping = shapes_[run_idx]; - - shaping.justify->resize(shaping.glyph_length()); - for (int i = 0; i < shaping.glyph_length(); i++, global_glyph_index++) - shaping.justify[i] = justify[global_glyph_index]; - } -} - -int UniscribeState::CharacterToX(int offset) const { - HRESULT hr; - DCHECK(offset <= input_length_); - - // Our algorithm is to traverse the items in screen order from left to - // right, adding in each item's screen width until we find the item with - // the requested character in it. - int width = 0; - for (size_t screen_idx = 0; screen_idx < runs_->size(); screen_idx++) { - // Compute the length of this run. - int item_idx = screen_order_[screen_idx]; - const SCRIPT_ITEM& item = runs_[item_idx]; - const Shaping& shaping = shapes_[item_idx]; - int item_length = shaping.char_length(); - - if (offset >= item.iCharPos && offset <= item.iCharPos + item_length) { - // Character offset is in this run. - int char_len = offset - item.iCharPos; - - int cur_x = 0; - hr = ScriptCPtoX(char_len, FALSE, item_length, shaping.glyph_length(), - &shaping.logs[0], &shaping.visattr[0], - shaping.effective_advances(), &item.a, &cur_x); - if (FAILED(hr)) - return 0; - - width += cur_x + shaping.pre_padding; - DCHECK(width >= 0); - return width; - } - - // Move to the next item. - width += AdvanceForItem(item_idx); - } - DCHECK(width >= 0); - return width; -} - -int UniscribeState::XToCharacter(int x) const { - // We iterate in screen order until we find the item with the given pixel - // position in it. When we find that guy, we ask Uniscribe for the - // character index. - HRESULT hr; - for (size_t screen_idx = 0; screen_idx < runs_->size(); screen_idx++) { - int item_idx = screen_order_[screen_idx]; - int advance_for_item = AdvanceForItem(item_idx); - - // Note that the run may be empty if shaping failed, so we want to skip - // over it. - const Shaping& shaping = shapes_[item_idx]; - int item_length = shaping.char_length(); - if (x <= advance_for_item && item_length > 0) { - // The requested offset is within this item. - const SCRIPT_ITEM& item = runs_[item_idx]; - - // Account for the leading space we've added to this run that Uniscribe - // doesn't know about. - x -= shaping.pre_padding; - - int char_x = 0; - int trailing; - hr = ScriptXtoCP(x, item_length, shaping.glyph_length(), - &shaping.logs[0], &shaping.visattr[0], - shaping.effective_advances(), &item.a, &char_x, - &trailing); - - // The character offset is within the item. We need to add the item's - // offset to transform it into the space of the TextRun - return char_x + item.iCharPos; - } - - // The offset is beyond this item, account for its length and move on. - x -= advance_for_item; - } - - // Error condition, we don't know what to do if we don't have that X - // position in any of our items. - return 0; -} - -void UniscribeState::Draw(HDC dc, int x, int y, int from, int to) { - HGDIOBJ old_font = 0; - int cur_x = x; - bool first_run = true; - - for (size_t screen_idx = 0; screen_idx < runs_->size(); screen_idx++) { - int item_idx = screen_order_[screen_idx]; - const SCRIPT_ITEM& item = runs_[item_idx]; - const Shaping& shaping = shapes_[item_idx]; - - // Character offsets within this run. THESE MAY NOT BE IN RANGE and may - // be negative, etc. The code below handles this. - int from_char = from - item.iCharPos; - int to_char = to - item.iCharPos; - - // See if we need to draw any characters in this item. - if (shaping.char_length() == 0 || - from_char >= shaping.char_length() || to_char <= 0) { - // No chars in this item to display. - cur_x += AdvanceForItem(item_idx); - continue; - } - - // Compute the starting glyph within this span. |from| and |to| are - // global offsets that may intersect arbitrarily with our local run. - int from_glyph, after_glyph; - if (item.a.fRTL) { - // To compute the first glyph when going RTL, we use |to|. - if (to_char >= shaping.char_length()) { - // The end of the text is after (to the left) of us. - from_glyph = 0; - } else { - // Since |to| is exclusive, the first character we draw on the left - // is actually the one right before (to the right) of |to|. - from_glyph = shaping.logs[to_char - 1]; - } - - // The last glyph is actually the first character in the range. - if (from_char <= 0) { - // The first character to draw is before (to the right) of this span, - // so draw all the way to the end. - after_glyph = shaping.glyph_length(); - } else { - // We want to draw everything up until the character to the right of - // |from|. To the right is - 1, so we look that up (remember our - // character could be more than one glyph, so we can't look up our - // glyph and add one). - after_glyph = shaping.logs[from_char - 1]; - } - } else { - // Easy case, everybody agrees about directions. We only need to handle - // boundary conditions to get a range inclusive at the beginning, and - // exclusive at the ending. We have to do some computation to see the - // glyph one past the end. - from_glyph = shaping.logs[from_char < 0 ? 0 : from_char]; - if (to_char >= shaping.char_length()) - after_glyph = shaping.glyph_length(); - else - after_glyph = shaping.logs[to_char]; - } - - // Account for the characters that were skipped in this run. When - // WebKit asks us to draw a subset of the run, it actually tells us - // to draw at the X offset of the beginning of the run, since it - // doesn't know the internal position of any of our characters. - const int* effective_advances = shaping.effective_advances(); - int inner_offset = 0; - for (int i = 0; i < from_glyph; i++) - inner_offset += effective_advances[i]; - - // Actually draw the glyphs we found. - int glyph_count = after_glyph - from_glyph; - if (from_glyph >= 0 && glyph_count > 0) { - // Account for the preceeding space we need to add to this run. We don't - // need to count for the following space because that will be counted - // in AdvanceForItem below when we move to the next run. - inner_offset += shaping.pre_padding; - - // Pass NULL in when there is no justification. - const int* justify = shaping.justify->empty() ? - NULL : &shaping.justify[from_glyph]; - - if (first_run) { - old_font = SelectObject(dc, shaping.hfont_); - first_run = false; - } else { - SelectObject(dc, shaping.hfont_); - } - - // TODO(brettw) bug 698452: if a half a character is selected, - // we should set up a clip rect so we draw the half of the glyph - // correctly. - // Fonts with different ascents can be used to render different runs. - // 'Across-runs' y-coordinate correction needs to be adjusted - // for each font. - HRESULT hr = S_FALSE; - for (int executions = 0; executions < 2; ++executions) { - hr = ScriptTextOut(dc, shaping.script_cache_, cur_x + inner_offset, - y - shaping.ascent_offset_, 0, NULL, &item.a, NULL, - 0, &shaping.glyphs[from_glyph], - glyph_count, &shaping.advance[from_glyph], - justify, &shaping.offsets[from_glyph]); - if (S_OK != hr && 0 == executions) { - // If this ScriptTextOut is called from the renderer it might fail - // because the sandbox is preventing it from opening the font files. - // If we are running in the renderer, TryToPreloadFont is overridden - // to ask the browser to preload the font for us so we can access it. - TryToPreloadFont(shaping.hfont_); - continue; - } - break; - } - - DCHECK(S_OK == hr); - - - } - - cur_x += AdvanceForItem(item_idx); - } - - if (old_font) - SelectObject(dc, old_font); -} - -WORD UniscribeState::FirstGlyphForCharacter(int char_offset) const { - // Find the run for the given character. - for (int i = 0; i < static_cast<int>(runs_->size()); i++) { - int first_char = runs_[i].iCharPos; - const Shaping& shaping = shapes_[i]; - int local_offset = char_offset - first_char; - if (local_offset >= 0 && local_offset < shaping.char_length()) { - // The character is in this run, return the first glyph for it (should - // generally be the only glyph). It seems Uniscribe gives glyph 0 for - // empty, which is what we want to return in the "missing" case. - size_t glyph_index = shaping.logs[local_offset]; - if (glyph_index >= shaping.glyphs->size()) { - // The glyph should be in this run, but the run has too few actual - // characters. This can happen when shaping the run fails, in which - // case, we should have no data in the logs at all. - DCHECK(shaping.glyphs->empty()); - return 0; - } - return shaping.glyphs[glyph_index]; - } - } - return 0; -} - -void UniscribeState::FillRuns() { - HRESULT hr; - runs_->resize(UNISCRIBE_STATE_STACK_RUNS); - - SCRIPT_STATE input_state; - input_state.uBidiLevel = is_rtl_; - input_state.fOverrideDirection = directional_override_; - input_state.fInhibitSymSwap = false; - input_state.fCharShape = false; // Not implemented in Uniscribe - input_state.fDigitSubstitute = false; // Do we want this for Arabic? - input_state.fInhibitLigate = inhibit_ligate_; - input_state.fDisplayZWG = false; // Don't draw control characters. - input_state.fArabicNumContext = is_rtl_; // Do we want this for Arabic? - input_state.fGcpClusters = false; - input_state.fReserved = 0; - input_state.fEngineReserved = 0; - // The psControl argument to ScriptItemize should be non-NULL for RTL text, - // per http://msdn.microsoft.com/en-us/library/ms776532.aspx . So use a - // SCRIPT_CONTROL that is set to all zeros. Zero as a locale ID means the - // neutral locale per http://msdn.microsoft.com/en-us/library/ms776294.aspx . - static SCRIPT_CONTROL input_control = {0, // uDefaultLanguage :16; - 0, // fContextDigits :1; - 0, // fInvertPreBoundDir :1; - 0, // fInvertPostBoundDir :1; - 0, // fLinkStringBefore :1; - 0, // fLinkStringAfter :1; - 0, // fNeutralOverride :1; - 0, // fNumericOverride :1; - 0, // fLegacyBidiClass :1; - 0, // fMergeNeutralItems :1; - 0};// fReserved :7; - // Calling ScriptApplyDigitSubstitution( NULL, &input_control, &input_state) - // here would be appropriate if we wanted to set the language ID, and get - // local digit substitution behavior. For now, don't do it. - - while (true) { - int num_items = 0; - - // Ideally, we would have a way to know the runs before and after this - // one, and put them into the control parameter of ScriptItemize. This - // would allow us to shape characters properly that cross style - // boundaries (WebKit bug 6148). - // - // We tell ScriptItemize that the output list of items is one smaller - // than it actually is. According to Mozilla bug 366643, if there is - // not enough room in the array on pre-SP2 systems, ScriptItemize will - // write one past the end of the buffer. - // - // ScriptItemize is very strange. It will often require a much larger - // ITEM buffer internally than it will give us as output. For example, - // it will say a 16-item buffer is not big enough, and will write - // interesting numbers into all those items. But when we give it a 32 - // item buffer and it succeeds, it only has one item output. - // - // It seems to be doing at least two passes, the first where it puts a - // lot of intermediate data into our items, and the second where it - // collates them. - hr = ScriptItemize(input_, input_length_, - static_cast<int>(runs_->size()) - 1, &input_control, &input_state, - &runs_[0], &num_items); - if (SUCCEEDED(hr)) { - runs_->resize(num_items); - break; - } - if (hr != E_OUTOFMEMORY) { - // Some kind of unexpected error. - runs_->resize(0); - break; - } - // There was not enough items for it to write into, expand. - runs_->resize(runs_->size() * 2); - } - - // Fix up the directions of the items so they're what WebKit thinks - // they are. WebKit (and we assume any other caller) always knows what - // direction it wants things to be in, and will only give us runs that are in - // the same direction. Sometimes, Uniscibe disagrees, for example, if you - // have embedded ASCII punctuation in an Arabic string, WebKit will - // (correctly) know that is should still be rendered RTL, but Uniscibe might - // think LTR is better. - // - // TODO(brettw) bug 747235: - // This workaround fixes the bug but causes spacing problems in other cases. - // WebKit sometimes gives us a big run that includes ASCII and Arabic, and - // this forcing direction makes those cases incorrect. This seems to happen - // during layout only, so it ends up that spacing is incorrect (because being - // the wrong direction changes ligatures and stuff). - // - //for (size_t i = 0; i < runs_->size(); i++) - // runs_[i].a.fRTL = is_rtl_; -} - - -bool UniscribeState::Shape(const wchar_t* input, - int item_length, - int num_glyphs, - SCRIPT_ITEM& run, - Shaping& shaping) { - HFONT hfont = hfont_; - SCRIPT_CACHE* script_cache = script_cache_; - SCRIPT_FONTPROPERTIES* font_properties = font_properties_; - int ascent = ascent_; - HDC temp_dc = NULL; - HGDIOBJ old_font = 0; - HRESULT hr; - bool lastFallbackTried = false; - bool result; - - int generated_glyphs = 0; - - // In case HFONT passed in ctor cannot render this run, we have to scan - // other fonts from the beginning of the font list. - ResetFontIndex(); - - // Compute shapes. - while (true) { - shaping.logs->resize(item_length); - shaping.glyphs->resize(num_glyphs); - shaping.visattr->resize(num_glyphs); - - // Firefox sets SCRIPT_ANALYSIS.SCRIPT_STATE.fDisplayZWG to true - // here. Is that what we want? It will display control characters. - hr = ScriptShape(temp_dc, script_cache, input, item_length, - num_glyphs, &run.a, - &shaping.glyphs[0], &shaping.logs[0], - &shaping.visattr[0], &generated_glyphs); - if (hr == E_PENDING) { - // Allocate the DC. - temp_dc = GetDC(NULL); - old_font = SelectObject(temp_dc, hfont); - continue; - } else if (hr == E_OUTOFMEMORY) { - num_glyphs *= 2; - continue; - } else if (SUCCEEDED(hr) && - (lastFallbackTried || !ContainsMissingGlyphs(&shaping.glyphs[0], - generated_glyphs, font_properties))) { - break; - } - - // The current font can't render this run. clear DC and try - // next font. - if (temp_dc) { - SelectObject(temp_dc, old_font); - ReleaseDC(NULL, temp_dc); - temp_dc = NULL; - } - - if (NextWinFontData(&hfont, &script_cache, &font_properties, &ascent)) { - // The primary font does not support this run. Try next font. - // In case of web page rendering, they come from fonts specified in - // CSS stylesheets. - continue; - } else if (!lastFallbackTried) { - lastFallbackTried = true; - - // Generate a last fallback font based on the script of - // a character to draw while inheriting size and styles - // from the primary font - if (!logfont_.lfFaceName[0]) - SetLogFontAndStyle(hfont_, &logfont_, &style_); - - // TODO(jungshik): generic type should come from webkit for - // UniscribeStateTextRun (a derived class used in webkit). - const wchar_t *family = GetFallbackFamily(input, item_length, - GENERIC_FAMILY_STANDARD, NULL, NULL); - bool font_ok = GetDerivedFontData(family, style_, &logfont_, &ascent, &hfont, &script_cache); - - if (!font_ok) { - // If this GetDerivedFontData is called from the renderer it might fail - // because the sandbox is preventing it from opening the font files. - // If we are running in the renderer, TryToPreloadFont is overridden to - // ask the browser to preload the font for us so we can access it. - TryToPreloadFont(hfont); - - // Try again. - font_ok = GetDerivedFontData(family, style_, &logfont_, &ascent, &hfont, &script_cache); - DCHECK(font_ok); - } - - // TODO(jungshik) : Currently GetDerivedHFont always returns a - // a valid HFONT, but in the future, I may change it to return 0. - DCHECK(hfont); - - // We don't need a font_properties for the last resort fallback font - // because we don't have anything more to try and are forced to - // accept empty glyph boxes. If we tried a series of fonts as - // 'last-resort fallback', we'd need it, but currently, we don't. - continue; - } else if (hr == USP_E_SCRIPT_NOT_IN_FONT) { - run.a.eScript = SCRIPT_UNDEFINED; - continue; - } else if (FAILED(hr)) { - // Error shaping. - generated_glyphs = 0; - result = false; - goto cleanup; - } - } - - // Sets Windows font data for this run to those corresponding to - // a font supporting this run. we don't need to store font_properties - // because it's not used elsewhere. - shaping.hfont_ = hfont; - shaping.script_cache_ = script_cache; - - // The ascent of a font for this run can be different from - // that of the primary font so that we need to keep track of - // the difference per run and take that into account when calling - // ScriptTextOut in |Draw|. Otherwise, different runs rendered by - // different fonts would not be aligned vertically. - shaping.ascent_offset_ = ascent_ ? ascent - ascent_ : 0; - result = true; - -cleanup: - shaping.glyphs->resize(generated_glyphs); - shaping.visattr->resize(generated_glyphs); - shaping.advance->resize(generated_glyphs); - shaping.offsets->resize(generated_glyphs); - if (temp_dc) { - SelectObject(temp_dc, old_font); - ReleaseDC(NULL, temp_dc); - } - // On failure, our logs don't mean anything, so zero those out. - if (!result) - shaping.logs->clear(); - - return result; -} - -void UniscribeState::FillShapes() { - shapes_->resize(runs_->size()); - for (size_t i = 0; i < runs_->size(); i++) { - int start_item = runs_[i].iCharPos; - int item_length = input_length_ - start_item; - if (i < runs_->size() - 1) - item_length = runs_[i + 1].iCharPos - start_item; - - int num_glyphs; - if (item_length < UNISCRIBE_STATE_STACK_CHARS) { - // We'll start our buffer sizes with the current stack space available - // in our buffers if the current input fits. As long as it - // doesn't expand past that we'll save a lot of time mallocing. - num_glyphs = UNISCRIBE_STATE_STACK_CHARS; - } else { - // When the input doesn't fit, give up with the stack since it will - // almost surely not be enough room (unless the input actually shrinks, - // which is unlikely) and just start with the length recommended by - // the Uniscribe documentation as a "usually fits" size. - num_glyphs = item_length * 3 / 2 + 16; - } - - // Convert a string to a glyph string trying the primary font, - // fonts in the fallback list and then script-specific last resort font. - Shaping& shaping = shapes_[i]; - if (!Shape(&input_[start_item], item_length, num_glyphs, runs_[i], shaping)) - continue; - - // Compute placements. Note that offsets is documented incorrectly - // and is actually an array. - - // DC that we lazily create if Uniscribe commands us to. - // (this does not happen often because script_cache is already - // updated when calling ScriptShape). - HDC temp_dc = NULL; - HGDIOBJ old_font = NULL; - HRESULT hr; - while (true) { - shaping.pre_padding = 0; - hr = ScriptPlace(temp_dc, shaping.script_cache_, &shaping.glyphs[0], - static_cast<int>(shaping.glyphs->size()), - &shaping.visattr[0], &runs_[i].a, - &shaping.advance[0], &shaping.offsets[0], - &shaping.abc); - if (hr != E_PENDING) - break; - - // Allocate the DC and run the loop again. - temp_dc = GetDC(NULL); - old_font = SelectObject(temp_dc, shaping.hfont_); - } - - if (FAILED(hr)) { - // Some error we don't know how to handle. Nuke all of our data - // since we can't deal with partially valid data later. - runs_->clear(); - shapes_->clear(); - screen_order_->clear(); - } - - if (temp_dc) { - SelectObject(temp_dc, old_font); - ReleaseDC(NULL, temp_dc); - } - } - - AdjustSpaceAdvances(); - - if (letter_spacing_ != 0 || word_spacing_ != 0) - ApplySpacing(); -} - -void UniscribeState::FillScreenOrder() { - screen_order_->resize(runs_->size()); - - // We assume that the input has only one text direction in it. - // TODO(brettw) are we sure we want to keep this restriction? - if (is_rtl_) { - for (int i = 0; i < static_cast<int>(screen_order_->size()); i++) - screen_order_[static_cast<int>(screen_order_->size()) - i - 1] = i; - } else { - for (int i = 0; i < static_cast<int>(screen_order_->size()); i++) - screen_order_[i] = i; - } -} - -void UniscribeState::AdjustSpaceAdvances() { - if (space_width_ == 0) - return; - - int space_width_without_letter_spacing = space_width_ - letter_spacing_; - - // This mostly matches what WebKit's UniscribeController::shapeAndPlaceItem. - for (size_t run = 0; run < runs_->size(); run++) { - Shaping& shaping = shapes_[run]; - - for (int i = 0; i < shaping.char_length(); i++) { - if (!TreatAsSpace(input_[runs_[run].iCharPos + i])) - continue; - - int glyph_index = shaping.logs[i]; - int current_advance = shaping.advance[glyph_index]; - // Don't give zero-width spaces a width. - if (!current_advance) - continue; - - // current_advance does not include additional letter-spacing, but - // space_width does. Here we find out how off we are from the correct - // width for the space not including letter-spacing, then just subtract - // that diff. - int diff = current_advance - space_width_without_letter_spacing; - // The shaping can consist of a run of text, so only subtract the - // difference in the width of the glyph. - shaping.advance[glyph_index] -= diff; - shaping.abc.abcB -= diff; - } - } -} - -void UniscribeState::ApplySpacing() { - for (size_t run = 0; run < runs_->size(); run++) { - Shaping& shaping = shapes_[run]; - bool is_rtl = runs_[run].a.fRTL; - - if (letter_spacing_ != 0) { - // RTL text gets padded to the left of each character. We increment the - // run's advance to make this happen. This will be balanced out by NOT - // adding additional advance to the last glyph in the run. - if (is_rtl) - shaping.pre_padding += letter_spacing_; - - // Go through all the glyphs in this run and increase the "advance" to - // account for letter spacing. We adjust letter spacing only on cluster - // boundaries. - // - // This works for most scripts, but may have problems with some indic - // scripts. This behavior is better than Firefox or IE for Hebrew. - for (int i = 0; i < shaping.glyph_length(); i++) { - if (shaping.visattr[i].fClusterStart) { - // Ick, we need to assign the extra space so that the glyph comes - // first, then is followed by the space. This is opposite for RTL. - if (is_rtl) { - if (i != shaping.glyph_length() - 1) { - // All but the last character just get the spacing applied to - // their advance. The last character doesn't get anything, - shaping.advance[i] += letter_spacing_; - shaping.abc.abcB += letter_spacing_; - } - } else { - // LTR case is easier, we just add to the advance. - shaping.advance[i] += letter_spacing_; - shaping.abc.abcB += letter_spacing_; - } - } - } - } - - // Go through all the characters to find whitespace and insert the extra - // wordspacing amount for the glyphs they correspond to. - if (word_spacing_ != 0) { - for (int i = 0; i < shaping.char_length(); i++) { - if (!TreatAsSpace(input_[runs_[run].iCharPos + i])) - continue; - - // The char in question is a word separator... - int glyph_index = shaping.logs[i]; - - // Spaces will not have a glyph in Uniscribe, it will just add - // additional advance to the character to the left of the space. The - // space's corresponding glyph will be the character following it in - // reading order. - if (is_rtl) { - // In RTL, the glyph to the left of the space is the same as the - // first glyph of the following character, so we can just increment - // it. - shaping.advance[glyph_index] += word_spacing_; - shaping.abc.abcB += word_spacing_; - } else { - // LTR is actually more complex here, we apply it to the previous - // character if there is one, otherwise we have to apply it to the - // leading space of the run. - if (glyph_index == 0) { - shaping.pre_padding += word_spacing_; - } else { - shaping.advance[glyph_index - 1] += word_spacing_; - shaping.abc.abcB += word_spacing_; - } - } - } - } // word_spacing_ != 0 - - // Loop for next run... - } -} - -// The advance is the ABC width of the run -int UniscribeState::AdvanceForItem(int item_index) const { - int accum = 0; - const Shaping& shaping = shapes_[item_index]; - - if (shaping.justify->empty()) { - // Easy case with no justification, the width is just the ABC width of t - // the run. (The ABC width is the sum of the advances). - return shaping.abc.abcA + shaping.abc.abcB + shaping.abc.abcC + - shaping.pre_padding; - } - - // With justification, we use the justified amounts instead. The - // justification array contains both the advance and the extra space - // added for justification, so is the width we want. - int justification = 0; - for (size_t i = 0; i < shaping.justify->size(); i++) - justification += shaping.justify[i]; - - return shaping.pre_padding + justification; -} - -} // namespace gfx - diff --git a/base/gfx/uniscribe.h b/base/gfx/uniscribe.h deleted file mode 100644 index 162e577..0000000 --- a/base/gfx/uniscribe.h +++ /dev/null @@ -1,366 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. -// -// A wrapper around Uniscribe that provides a reasonable API. - -#ifndef BASE_GFX_UNISCRIBE_H__ -#define BASE_GFX_UNISCRIBE_H__ - -#include <windows.h> -#include <usp10.h> -#include <wchar.h> -#include <map> -#include <vector> - -#include "base/stack_container.h" -#include "testing/gtest/include/gtest/gtest_prod.h" - -namespace gfx { - -#define UNISCRIBE_STATE_STACK_RUNS 8 -#define UNISCRIBE_STATE_STACK_CHARS 32 - -// This object should be safe to create & destroy frequently, as long as the -// caller preserves the script_cache when possible (this data may be slow to -// compute). -// -// This object is "kind of large" (~1K) because it reserves a lot of space for -// working with to avoid expensive heap operations. Therefore, not only should -// you not worry about creating and destroying it, you should try to not keep -// them around. -class UniscribeState { - public: - // Initializes this Uniscribe run with the text pointed to by |run| with - // |length|. The input is NOT null terminated. - // - // The is_rtl flag should be set if the input script is RTL. It is assumed - // that the caller has already divided up the input text (using ICU, for - // example) into runs of the same direction of script. This avoids - // disagreements between the caller and Uniscribe later (see FillItems). - // - // A script cache should be provided by the caller that is initialized to - // NULL. When the caller is done with the cache (it may be stored between - // runs as long as it is used consistently with the same HFONT), it should - // call ScriptFreeCache(). - UniscribeState(const wchar_t* input, - int input_length, - bool is_rtl, - HFONT hfont, - SCRIPT_CACHE* script_cache, - SCRIPT_FONTPROPERTIES* font_properties); - - virtual ~UniscribeState(); - - // Sets Uniscribe's directional override flag. False by default. - bool directional_override() const { - return directional_override_; - } - void set_directional_override(bool override) { - directional_override_ = override; - } - - // Set's Uniscribe's no-ligate override flag. False by default. - bool inhibit_ligate() const { - return inhibit_ligate_; - } - void set_inhibit_ligate(bool inhibit) { - inhibit_ligate_ = inhibit; - } - - // Set letter spacing. We will try to insert this much space between - // graphemes (one or more glyphs perceived as a single unit by ordinary users - // of a script). Positive values increase letter spacing, negative values - // decrease it. 0 by default. - int letter_spacing() const { - return letter_spacing_; - } - void set_letter_spacing(int letter_spacing) { - letter_spacing_ = letter_spacing; - } - - // Set the width of a standard space character. We use this to normalize - // space widths. Windows will make spaces after Hindi characters larger than - // other spaces. A space_width of 0 means to use the default space width. - // - // Must be set before Init() is called. - int space_width() const { - return space_width_; - } - void set_space_width(int space_width) { - space_width_ = space_width; - } - - // Set word spacing. We will try to insert this much extra space between - // each word in the input (beyond whatever whitespace character separates - // words). Positive values lead to increased letter spacing, negative values - // decrease it. 0 by default. - // - // Must be set before Init() is called. - int word_spacing() const { - return word_spacing_; - } - void set_word_spacing(int word_spacing) { - word_spacing_ = word_spacing; - } - void set_ascent(int ascent) { - ascent_ = ascent; - } - - // You must call this after setting any options but before doing any - // other calls like asking for widths or drawing. - void Init() { InitWithOptionalLengthProtection(true); } - - // Returns the total width in pixels of the text run. - int Width() const; - - // Call to justify the text, with the amount of space that should be ADDED to - // get the desired width that the column should be justified to. Normally, - // spaces are inserted, but for Arabic there will be kashidas (extra strokes) - // inserted instead. - // - // This function MUST be called AFTER Init(). - void Justify(int additional_space); - - // Computes the given character offset into a pixel offset of the beginning - // of that character. - int CharacterToX(int offset) const; - - // Converts the given pixel X position into a logical character offset into - // the run. For positions appearing before the first character, this will - // return -1. - int XToCharacter(int x) const; - - // Draws the given characters to (x, y) in the given DC. The font will be - // handled by this function, but the font color and other attributes should - // be pre-set. - // - // The y position is the upper left corner, NOT the baseline. - void Draw(HDC dc, int x, int y, int from, int to); - - // Returns the first glyph assigned to the character at the given offset. - // This function is used to retrieve glyph information when Uniscribe is - // being used to generate glyphs for non-complex, non-BMP (above U+FFFF) - // characters. These characters are not otherwise special and have no - // complex shaping rules, so we don't otherwise need Uniscribe, except - // Uniscribe is the only way to get glyphs for non-BMP characters. - // - // Returns 0 if there is no glyph for the given character. - WORD FirstGlyphForCharacter(int char_offset) const; - - protected: - // Backend for init. The flag allows the unit test to specify whether we - // should fail early for very long strings like normal, or try to pass the - // long string to Uniscribe. The latter provides a way to force failure of - // shaping. - void InitWithOptionalLengthProtection(bool length_protection); - - // Tries to preload the font when the it is not accessible. - // This is the default implementation and it does not do anything. - virtual void TryToPreloadFont(HFONT font) {} - - private: - FRIEND_TEST(UniscribeTest, TooBig); - - // An array corresponding to each item in runs_ containing information - // on each of the glyphs that were generated. Like runs_, this is in - // reading order. However, for rtl text, the characters within each - // item will be reversed. - struct Shaping { - Shaping() - : pre_padding(0), - hfont_(NULL), - script_cache_(NULL), - ascent_offset_(0) { - abc.abcA = 0; - abc.abcB = 0; - abc.abcC = 0; - } - - // Returns the number of glyphs (which will be drawn to the screen) - // in this run. - int glyph_length() const { - return static_cast<int>(glyphs->size()); - } - - // Returns the number of characters (that we started with) in this run. - int char_length() const { - return static_cast<int>(logs->size()); - } - - // Returns the advance array that should be used when measuring glyphs. - // The returned pointer will indicate an array with glyph_length() elements - // and the advance that should be used for each one. This is either the - // real advance, or the justified advances if there is one, and is the - // array we want to use for measurement. - const int* effective_advances() const { - if (advance->empty()) - return 0; - if (justify->empty()) - return &advance[0]; - return &justify[0]; - } - - // This is the advance amount of space that we have added to the beginning - // of the run. It is like the ABC's |A| advance but one that we create and - // must handle internally whenever computing with pixel offsets. - int pre_padding; - - // Glyph indices in the font used to display this item. These indices - // are in screen order. - StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> glyphs; - - // For each input character, this tells us the first glyph index it - // generated. This is the only array with size of the input chars. - // - // All offsets are from the beginning of this run. Multiple characters can - // generate one glyph, in which case there will be adjacent duplicates in - // this list. One character can also generate multiple glyphs, in which - // case there will be skipped indices in this list. - StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> logs; - - // Flags and such for each glyph. - StackVector<SCRIPT_VISATTR, UNISCRIBE_STATE_STACK_CHARS> visattr; - - // Horizontal advances for each glyph listed above, this is basically - // how wide each glyph is. - StackVector<int, UNISCRIBE_STATE_STACK_CHARS> advance; - - // This contains glyph offsets, from the nominal position of a glyph. It - // is used to adjust the positions of multiple combining characters - // around/above/below base characters in a context-sensitive manner so - // that they don't bump against each other and the base character. - StackVector<GOFFSET, UNISCRIBE_STATE_STACK_CHARS> offsets; - - // Filled by a call to Justify, this is empty for nonjustified text. - // If nonempty, this contains the array of justify characters for each - // character as returned by ScriptJustify. - // - // This is the same as the advance array, but with extra space added for - // some characters. The difference between a glyph's |justify| width and - // it's |advance| width is the extra space added. - StackVector<int, UNISCRIBE_STATE_STACK_CHARS> justify; - - // Sizing information for this run. This treats the entire run as a - // character with a preceeding advance, width, and ending advance. - // The B width is the sum of the |advance| array, and the A and C widths - // are any extra spacing applied to each end. - // - // It is unclear from the documentation what this actually means. From - // experimentation, it seems that the sum of the character advances is - // always the sum of the ABC values, and I'm not sure what you're supposed - // to do with the ABC values. - ABC abc; - - // Pointers to windows font data used to render this run. - HFONT hfont_; - SCRIPT_CACHE* script_cache_; - - // Ascent offset between the ascent of the primary font - // and that of the fallback font. The offset needs to be applied, - // when drawing a string, to align multiple runs rendered with - // different fonts. - int ascent_offset_; - }; - - // Computes the runs_ array from the text run. - void FillRuns(); - - // Computes the shapes_ array given an runs_ array already filled in. - void FillShapes(); - - // Fills in the screen_order_ array (see below). - void FillScreenOrder(); - - // Called to update the glyph positions based on the current spacing options - // that are set. - void ApplySpacing(); - - // Normalizes all advances for spaces to the same width. This keeps windows - // from making spaces after Hindi characters larger, which is then - // inconsistent with our meaure of the width since WebKit doesn't include - // spaces in text-runs sent to uniscribe unless white-space:pre. - void AdjustSpaceAdvances(); - - // Returns the total width of a single item. - int AdvanceForItem(int item_index) const; - - // Shapes a run (pointed to by |input|) using |hfont| first. - // Tries a series of fonts specified retrieved with NextWinFontData - // and finally a font covering characters in |*input|. A string pointed - // by |input| comes from ScriptItemize and is supposed to contain - // characters belonging to a single script aside from characters - // common to all scripts (e.g. space). - bool Shape(const wchar_t* input, - int item_length, - int num_glyphs, - SCRIPT_ITEM& run, - Shaping& shaping); - - // Gets Windows font data for the next best font to try in the list - // of fonts. When there's no more font available, returns false - // without touching any of out params. Need to call ResetFontIndex - // to start scanning of the font list from the beginning. - virtual bool NextWinFontData(HFONT* hfont, - SCRIPT_CACHE** script_cache, - SCRIPT_FONTPROPERTIES** font_properties, - int* ascent) { - return false; - } - - // Resets the font index to the first in the list of fonts - // to try after the primaryFont turns out not to work. With font_index - // reset, NextWinFontData scans fallback fonts from the beginning. - virtual void ResetFontIndex() {} - - // The input data for this run of Uniscribe. See the constructor. - const wchar_t* input_; - const int input_length_; - const bool is_rtl_; - - // Windows font data for the primary font : - // In a sense, logfont_ and style_ are redundant because - // hfont_ contains all the information. However, invoking GetObject, - // everytime we need the height and the style, is rather expensive so - // that we cache them. Would it be better to add getter and (virtual) - // setter for the height and the style of the primary font, instead of - // logfont_? Then, a derived class ctor can set ascent_, height_ and style_ - // if they're known. Getters for them would have to 'infer' their values from - // hfont_ ONLY when they're not set. - HFONT hfont_; - SCRIPT_CACHE* script_cache_; - SCRIPT_FONTPROPERTIES* font_properties_; - int ascent_; - LOGFONT logfont_; - int style_; - - // Options, see the getters/setters above. - bool directional_override_; - bool inhibit_ligate_; - int letter_spacing_; - int space_width_; - int word_spacing_; - int justification_width_; - - // Uniscribe breaks the text into Runs. These are one length of text that is - // in one script and one direction. This array is in reading order. - StackVector<SCRIPT_ITEM, UNISCRIBE_STATE_STACK_RUNS> runs_; - - StackVector<Shaping, UNISCRIBE_STATE_STACK_RUNS> shapes_; - - // This is a mapping between reading order and screen order for the items. - // Uniscribe's items array are in reading order. For right-to-left text, - // or mixed (although WebKit's |TextRun| should really be only one - // direction), this makes it very difficult to compute character offsets - // and positions. This list is in screen order from left to right, and - // gives the index into the |runs_| and |shapes_| arrays of each - // subsequent item. - StackVector<int, UNISCRIBE_STATE_STACK_RUNS> screen_order_; - - DISALLOW_EVIL_CONSTRUCTORS(UniscribeState); -}; - -} // namespace gfx - -#endif // BASE_GFX_UNISCRIBE_H__ - diff --git a/base/gfx/uniscribe_unittest.cc b/base/gfx/uniscribe_unittest.cc deleted file mode 100644 index bbad411..0000000 --- a/base/gfx/uniscribe_unittest.cc +++ /dev/null @@ -1,140 +0,0 @@ -// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "base/gfx/uniscribe.h" -#include "base/win_util.h" -#include "testing/gtest/include/gtest/gtest.h" - -// This must be in the gfx namespace for the friend statements in uniscribe.h -// to work. -namespace gfx { - -namespace { - -class UniscribeTest : public testing::Test { - public: - UniscribeTest() { - } - - // Returns an HFONT with the given name. The caller does not have to free - // this, it will be automatically freed at the end of the test. Returns NULL - // on failure. On success, the - HFONT MakeFont(const wchar_t* font_name, SCRIPT_CACHE** cache) { - LOGFONT lf; - memset(&lf, 0, sizeof(LOGFONT)); - lf.lfHeight = 20; - wcscpy_s(lf.lfFaceName, font_name); - - HFONT hfont = CreateFontIndirect(&lf); - if (!hfont) - return NULL; - - *cache = new SCRIPT_CACHE; - **cache = NULL; - created_fonts_.push_back(std::make_pair(hfont, *cache)); - return hfont; - } - - protected: - // Default font properties structure for tests to use. - SCRIPT_FONTPROPERTIES properties_; - - private: - virtual void SetUp() { - memset(&properties_, 0, sizeof(SCRIPT_FONTPROPERTIES)); - properties_.cBytes = sizeof(SCRIPT_FONTPROPERTIES); - properties_.wgBlank = ' '; - properties_.wgDefault = '?'; // Used when the character is not in the font. - properties_.wgInvalid = '#'; // Used for invalid characters. - } - - virtual void TearDown() { - // Free any allocated fonts. - for (size_t i = 0; i < created_fonts_.size(); i++) { - DeleteObject(created_fonts_[i].first); - ScriptFreeCache(created_fonts_[i].second); - delete created_fonts_[i].second; - } - created_fonts_.clear(); - } - - // Tracks allocated fonts so we can delete them at the end of the test. - // The script cache pointer is heap allocated and must be freed. - std::vector< std::pair<HFONT, SCRIPT_CACHE*> > created_fonts_; - - DISALLOW_EVIL_CONSTRUCTORS(UniscribeTest); -}; - -} // namespace - -// This test tests giving Uniscribe a very large buffer, which will cause a -// failure. -TEST_F(UniscribeTest, TooBig) { - // This test will only run on Windows XP. It seems Uniscribe does not have the - // internal limit on Windows 2000 that we rely on to cause this failure. - if (win_util::GetWinVersion() <= win_util::WINVERSION_2000) - return; - - // Make a large string with an e with a zillion combining accents. - std::wstring input(L"e"); - for (int i = 0; i < 100000; i++) - input.push_back(0x301); // Combining acute accent. - - SCRIPT_CACHE* script_cache; - HFONT hfont = MakeFont(L"Times New Roman", &script_cache); - ASSERT_TRUE(hfont); - - // Test a long string without the normal length protection we have. This will - // cause shaping to fail. - { - gfx::UniscribeState uniscribe(input.data(), static_cast<int>(input.size()), - false, hfont, script_cache, &properties_); - uniscribe.InitWithOptionalLengthProtection(false); - - // There should be one shaping entry, with nothing in it. - ASSERT_EQ(1, uniscribe.shapes_->size()); - EXPECT_EQ(0, uniscribe.shapes_[0].glyphs->size()); - EXPECT_EQ(0, uniscribe.shapes_[0].logs->size()); - EXPECT_EQ(0, uniscribe.shapes_[0].visattr->size()); - EXPECT_EQ(0, uniscribe.shapes_[0].advance->size()); - EXPECT_EQ(0, uniscribe.shapes_[0].offsets->size()); - EXPECT_EQ(0, uniscribe.shapes_[0].justify->size()); - EXPECT_EQ(0, uniscribe.shapes_[0].abc.abcA); - EXPECT_EQ(0, uniscribe.shapes_[0].abc.abcB); - EXPECT_EQ(0, uniscribe.shapes_[0].abc.abcC); - - // The sizes of the other stuff should match the shaping entry. - EXPECT_EQ(1, uniscribe.runs_->size()); - EXPECT_EQ(1, uniscribe.screen_order_->size()); - - // Check that the various querying functions handle the empty case properly. - EXPECT_EQ(0, uniscribe.Width()); - EXPECT_EQ(0, uniscribe.FirstGlyphForCharacter(0)); - EXPECT_EQ(0, uniscribe.FirstGlyphForCharacter(1000)); - EXPECT_EQ(0, uniscribe.XToCharacter(0)); - EXPECT_EQ(0, uniscribe.XToCharacter(1000)); - } - - // Now test the very large string and make sure it is handled properly by the - // length protection. - { - gfx::UniscribeState uniscribe(input.data(), static_cast<int>(input.size()), - false, hfont, script_cache, &properties_); - uniscribe.InitWithOptionalLengthProtection(true); - - // There should be 0 runs and shapes. - EXPECT_EQ(0, uniscribe.runs_->size()); - EXPECT_EQ(0, uniscribe.shapes_->size()); - EXPECT_EQ(0, uniscribe.screen_order_->size()); - - EXPECT_EQ(0, uniscribe.Width()); - EXPECT_EQ(0, uniscribe.FirstGlyphForCharacter(0)); - EXPECT_EQ(0, uniscribe.FirstGlyphForCharacter(1000)); - EXPECT_EQ(0, uniscribe.XToCharacter(0)); - EXPECT_EQ(0, uniscribe.XToCharacter(1000)); - } -} - -} // namespace gfx - |