summaryrefslogtreecommitdiffstats
path: root/base/gfx
diff options
context:
space:
mode:
authorbrettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-11-17 17:30:19 +0000
committerbrettw@google.com <brettw@google.com@0039d316-1c4b-4281-b951-d872f2087c98>2008-11-17 17:30:19 +0000
commit9b9877d2952882560e37205edf33ad460a7efa91 (patch)
tree452db327f3b534b65d8d8260213094c5d9e6d00d /base/gfx
parentb6f2b91367ba6ed9ebb305233d4e786c1c748e45 (diff)
downloadchromium_src-9b9877d2952882560e37205edf33ad460a7efa91.zip
chromium_src-9b9877d2952882560e37205edf33ad460a7efa91.tar.gz
chromium_src-9b9877d2952882560e37205edf33ad460a7efa91.tar.bz2
Debase our Uniscribe code. This moves FontUtils and all our Uniscribe code from
base/gfx to webkit/port/platform/graphics. I fixed the indenting and naming of the moved code. Review URL: http://codereview.chromium.org/10785 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@5561 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'base/gfx')
-rw-r--r--base/gfx/base_gfx.scons4
-rw-r--r--base/gfx/font_utils.cc336
-rw-r--r--base/gfx/font_utils.h87
-rw-r--r--base/gfx/uniscribe.cc848
-rw-r--r--base/gfx/uniscribe.h366
-rw-r--r--base/gfx/uniscribe_unittest.cc140
6 files changed, 0 insertions, 1781 deletions
diff --git a/base/gfx/base_gfx.scons b/base/gfx/base_gfx.scons
index 9892d66..748f13b 100644
--- a/base/gfx/base_gfx.scons
+++ b/base/gfx/base_gfx.scons
@@ -33,7 +33,6 @@ if env['PLATFORM'] == 'win32':
input_files = [
'convolver.cc',
- 'font_utils.cc',
'gdi_util.cc',
'image_operations.cc',
'native_theme.cc',
@@ -43,7 +42,6 @@ input_files = [
'rect.cc',
'size.cc',
'skia_utils.cc',
- 'uniscribe.cc',
'vector_canvas.cc',
'vector_device.cc',
]
@@ -52,11 +50,9 @@ if env['PLATFORM'] in ('posix', 'darwin'):
# Remove files that still need to be ported from the input_files list.
# TODO(port): delete files from this list as they get ported.
to_be_ported_files = [
- 'font_utils.cc',
'gdi_util.cc',
'native_theme.cc',
'skia_utils.cc',
- 'uniscribe.cc',
'vector_canvas.cc',
'vector_device.cc',
]
diff --git a/base/gfx/font_utils.cc b/base/gfx/font_utils.cc
deleted file mode 100644
index 23c7f3a..0000000
--- a/base/gfx/font_utils.cc
+++ /dev/null
@@ -1,336 +0,0 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "base/gfx/font_utils.h"
-
-#include <limits>
-#include <map>
-
-#include "base/gfx/uniscribe.h"
-#include "base/logging.h"
-#include "base/singleton.h"
-#include "base/string_util.h"
-#include "unicode/locid.h"
-#include "unicode/uchar.h"
-
-namespace gfx {
-
-namespace {
-
-// hash_map has extra cost with no sizable gain for a small number of integer
-// key items. When the map size becomes much bigger (which will be later as
-// more scripts are added) and this turns out to be prominent in the profile, we
-// may consider switching to hash_map (or just an array if we support all the
-// scripts)
-typedef std::map<UScriptCode, const wchar_t*> ScriptToFontMap;
-
-struct ScriptToFontMapSingletonTraits
- : public DefaultSingletonTraits<ScriptToFontMap> {
- static ScriptToFontMap* New() {
- struct FontMap {
- UScriptCode script;
- const wchar_t* family;
- };
-
- const static FontMap font_map[] = {
- {USCRIPT_LATIN, L"times new roman"},
- {USCRIPT_GREEK, L"times new roman"},
- {USCRIPT_CYRILLIC, L"times new roman"},
- {USCRIPT_SIMPLIFIED_HAN, L"simsun"},
- //{USCRIPT_TRADITIONAL_HAN, L"pmingliu"},
- {USCRIPT_HIRAGANA, L"ms pgothic"},
- {USCRIPT_KATAKANA, L"ms pgothic"},
- {USCRIPT_KATAKANA_OR_HIRAGANA, L"ms pgothic"},
- {USCRIPT_HANGUL, L"gulim"},
- {USCRIPT_THAI, L"tahoma"},
- {USCRIPT_HEBREW, L"david"},
- {USCRIPT_ARABIC, L"tahoma"},
- {USCRIPT_DEVANAGARI, L"mangal"},
- {USCRIPT_BENGALI, L"vrinda"},
- {USCRIPT_GURMUKHI, L"raavi"},
- {USCRIPT_GUJARATI, L"shruti"},
- {USCRIPT_ORIYA, L"kalinga"},
- {USCRIPT_TAMIL, L"latha"},
- {USCRIPT_TELUGU, L"gautami"},
- {USCRIPT_KANNADA, L"tunga"},
- {USCRIPT_MALAYALAM, L"kartika"},
- {USCRIPT_LAO, L"dokchampa"},
- {USCRIPT_TIBETAN, L"microsoft himalaya"},
- {USCRIPT_GEORGIAN, L"sylfaen"},
- {USCRIPT_ARMENIAN, L"sylfaen"},
- {USCRIPT_ETHIOPIC, L"nyala"},
- {USCRIPT_CANADIAN_ABORIGINAL, L"euphemia"},
- {USCRIPT_CHEROKEE, L"plantagenet cherokee"},
- {USCRIPT_YI, L"microsoft yi balti"},
- {USCRIPT_SINHALA, L"iskoola pota"},
- {USCRIPT_SYRIAC, L"estrangelo edessa"},
- {USCRIPT_KHMER, L"daunpenh"},
- {USCRIPT_THAANA, L"mv boli"},
- {USCRIPT_MONGOLIAN, L"mongolian balti"},
- {USCRIPT_MYANMAR, L"padauk"},
- // For USCRIPT_COMMON, we map blocks to scripts when
- // that makes sense.
- };
-
- ScriptToFontMap* new_instance = new ScriptToFontMap;
- // Cannot recover from OOM so that there's no need to check.
- for (int i = 0; i < arraysize(font_map); ++i)
- (*new_instance)[font_map[i].script] = font_map[i].family;
-
- // Initialize the locale-dependent mapping.
- // Since Chrome synchronizes the ICU default locale with its UI locale,
- // this ICU locale tells the current UI locale of Chrome.
- Locale locale = Locale::getDefault();
- ScriptToFontMap::const_iterator iter;
- if (locale == Locale::getJapanese()) {
- iter = new_instance->find(USCRIPT_HIRAGANA);
- } else if (locale == Locale::getKorean()) {
- iter = new_instance->find(USCRIPT_HANGUL);
- } else {
- // Use Simplified Chinese font for all other locales including
- // Traditional Chinese because Simsun (SC font) has a wider
- // coverage (covering both SC and TC) than PMingLiu (TC font).
- // This also speeds up the TC version of Chrome when rendering SC pages.
- iter = new_instance->find(USCRIPT_SIMPLIFIED_HAN);
- }
- if (iter != new_instance->end())
- (*new_instance)[USCRIPT_HAN] = iter->second;
-
- return new_instance;
- }
-};
-
-Singleton<ScriptToFontMap, ScriptToFontMapSingletonTraits> script_font_map;
-
-const int kUndefinedAscent = std::numeric_limits<int>::min();
-
-// Given an HFONT, return the ascent. If GetTextMetrics fails,
-// kUndefinedAscent is returned, instead.
-int GetAscent(HFONT hfont) {
- HDC dc = GetDC(NULL);
- HGDIOBJ oldFont = SelectObject(dc, hfont);
- TEXTMETRIC tm;
- BOOL got_metrics = GetTextMetrics(dc, &tm);
- SelectObject(dc, oldFont);
- ReleaseDC(NULL, dc);
- return got_metrics ? tm.tmAscent : kUndefinedAscent;
-}
-
-struct FontData {
- FontData() : hfont(NULL), ascent(kUndefinedAscent), script_cache(NULL) {}
- HFONT hfont;
- int ascent;
- mutable SCRIPT_CACHE script_cache;
-};
-
-// Again, using hash_map does not earn us much here.
-// page_cycler_test intl2 gave us a 'better' result with map than with hash_map
-// even though they're well-within 1-sigma of each other so that the difference
-// is not significant. On the other hand, some pages in intl2 seem to
-// take longer to load with map in the 1st pass. Need to experiment further.
-typedef std::map<std::wstring, FontData*> FontDataCache;
-struct FontDataCacheSingletonTraits
- : public DefaultSingletonTraits<FontDataCache> {
- static void Delete(FontDataCache* cache) {
- FontDataCache::iterator iter = cache->begin();
- while (iter != cache->end()) {
- SCRIPT_CACHE script_cache = iter->second->script_cache;
- if (script_cache)
- ScriptFreeCache(&script_cache);
- delete iter->second;
- ++iter;
- }
- delete cache;
- }
-};
-
-} // namespace
-
-// TODO(jungshik) : this is font fallback code version 0.1
-// - Cover all the scripts
-// - Get the default font for each script/generic family from the
-// preference instead of hardcoding in the source.
-// (at least, read values from the registry for IE font settings).
-// - Support generic families (from FontDescription)
-// - If the default font for a script is not available,
-// try some more fonts known to support it. Finally, we can
-// use EnumFontFamilies or similar APIs to come up with a list of
-// fonts supporting the script and cache the result.
-// - Consider using UnicodeSet (or UnicodeMap) converted from
-// GLYPHSET (BMP) or directly read from truetype cmap tables to
-// keep track of which character is supported by which font
-// - Update script_font_cache in response to WM_FONTCHANGE
-
-const wchar_t* GetFontFamilyForScript(UScriptCode script,
- GenericFamilyType generic) {
- ScriptToFontMap::const_iterator iter = script_font_map->find(script);
- const wchar_t* family = NULL;
- if (iter != script_font_map->end()) {
- family = iter->second;
- }
- return family;
-}
-
-// TODO(jungshik)
-// - Handle 'Inherited', 'Common' and 'Unknown'
-// (see http://www.unicode.org/reports/tr24/#Usage_Model )
-// For 'Inherited' and 'Common', perhaps we need to
-// accept another parameter indicating the previous family
-// and just return it.
-// - All the characters (or characters up to the point a single
-// font can cover) need to be taken into account
-const wchar_t* GetFallbackFamily(const wchar_t *characters,
- int length,
- GenericFamilyType generic,
- UChar32 *char_checked,
- UScriptCode *script_checked) {
- DCHECK(characters && characters[0] && length > 0);
- UScriptCode script = USCRIPT_COMMON;
-
- // Sometimes characters common to script (e.g. space) is at
- // the beginning of a string so that we need to skip them
- // to get a font required to render the string.
- int i = 0;
- UChar32 ucs4 = 0;
- while (i < length && script == USCRIPT_COMMON ||
- script == USCRIPT_INVALID_CODE) {
- U16_NEXT(characters, i, length, ucs4);
- UErrorCode err = U_ZERO_ERROR;
- script = uscript_getScript(ucs4, &err);
- // silently ignore the error
- }
-
- // hack for full width ASCII. For the full-width ASCII, use the font
- // for Han (which is locale-dependent).
- if (0xFF00 < ucs4 && ucs4 < 0xFF5F)
- script = USCRIPT_HAN;
-
- // There are a lot of characters in USCRIPT_COMMON that can be covered
- // by fonts for scripts closely related to them.
- // See http://unicode.org/cldr/utility/list-unicodeset.jsp?a=[:Script=Common:]
- // TODO(jungshik): make this more efficient with a wider coverage
- if (script == USCRIPT_COMMON || script == USCRIPT_INHERITED) {
- UBlockCode block = ublock_getCode(ucs4);
- switch (block) {
- case UBLOCK_BASIC_LATIN:
- script = USCRIPT_LATIN;
- break;
- case UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION:
- script = USCRIPT_HAN;
- break;
- case UBLOCK_HIRAGANA:
- case UBLOCK_KATAKANA:
- script = USCRIPT_HIRAGANA;
- break;
- case UBLOCK_ARABIC:
- script = USCRIPT_ARABIC;
- break;
- case UBLOCK_GREEK:
- script = USCRIPT_GREEK;
- break;
- case UBLOCK_DEVANAGARI:
- // For Danda and Double Danda (U+0964, U+0965), use a Devanagari
- // font for now although they're used by other scripts as well.
- // Without a context, we can't do any better.
- script = USCRIPT_DEVANAGARI;
- break;
- case UBLOCK_ARMENIAN:
- script = USCRIPT_ARMENIAN;
- break;
- case UBLOCK_GEORGIAN:
- script = USCRIPT_GEORGIAN;
- break;
- case UBLOCK_KANNADA:
- script = USCRIPT_KANNADA;
- break;
- }
- }
-
- // Another lame work-around to cover non-BMP characters.
- const wchar_t* family = GetFontFamilyForScript(script, generic);
- if (!family) {
- int plane = ucs4 >> 16;
- switch (plane) {
- case 1:
- family = L"code2001";
- break;
- case 2:
- family = L"simsun-extb";
- break;
- default:
- family = L"lucida sans unicode";
- }
- }
-
- if (char_checked) *char_checked = ucs4;
- if (script_checked) *script_checked = script;
- return family;
-}
-
-
-
-// Be aware that this is not thread-safe.
-bool GetDerivedFontData(const wchar_t *family,
- int style,
- LOGFONT *logfont,
- int *ascent,
- HFONT *hfont,
- SCRIPT_CACHE **script_cache) {
- DCHECK(logfont && family && *family);
- // Using |Singleton| here is not free, but the intl2 page cycler test
- // does not show any noticeable difference with and without it. Leaking
- // the contents of FontDataCache (especially SCRIPT_CACHE) at the end
- // of a renderer process may not be a good idea. We may use
- // atexit(). However, with no noticeable performance difference, |Singleton|
- // is cleaner, I believe.
- FontDataCache* font_data_cache =
- Singleton<FontDataCache, FontDataCacheSingletonTraits>::get();
- // TODO(jungshik) : This comes up pretty high in the profile so that
- // we need to measure whether using SHA256 (after coercing all the
- // fields to char*) is faster than StringPrintf.
- std::wstring font_key = StringPrintf(L"%1d:%d:%ls", style, logfont->lfHeight,
- family);
- FontDataCache::const_iterator iter = font_data_cache->find(font_key);
- FontData *derived;
- if (iter == font_data_cache->end()) {
- DCHECK(wcslen(family) < LF_FACESIZE);
- wcscpy_s(logfont->lfFaceName, LF_FACESIZE, family);
- // TODO(jungshik): CreateFontIndirect always comes up with
- // a font even if there's no font matching the name. Need to
- // check it against what we actually want (as is done in FontCacheWin.cpp)
- derived = new FontData;
- derived->hfont = CreateFontIndirect(logfont);
- // GetAscent may return kUndefinedAscent, but we still want to
- // cache it so that we won't have to call CreateFontIndirect once
- // more for HFONT next time.
- derived->ascent = GetAscent(derived->hfont);
- (*font_data_cache)[font_key] = derived;
- } else {
- derived = iter->second;
- // Last time, GetAscent failed so that only HFONT was
- // cached. Try once more assuming that TryPreloadFont
- // was called by a caller between calls.
- if (kUndefinedAscent == derived->ascent)
- derived->ascent = GetAscent(derived->hfont);
- }
- *hfont = derived->hfont;
- *ascent = derived->ascent;
- *script_cache = &(derived->script_cache);
- return *ascent != kUndefinedAscent;
-}
-
-int GetStyleFromLogfont(const LOGFONT* logfont) {
- // TODO(jungshik) : consider defining UNDEFINED or INVALID for style and
- // returning it when logfont is NULL
- if (!logfont) {
- NOTREACHED();
- return FONT_STYLE_NORMAL;
- }
- return (logfont->lfItalic ? FONT_STYLE_ITALIC : FONT_STYLE_NORMAL) |
- (logfont->lfUnderline ? FONT_STYLE_UNDERLINED : FONT_STYLE_NORMAL) |
- (logfont->lfWeight >= 700 ? FONT_STYLE_BOLD : FONT_STYLE_NORMAL);
-}
-
-} // namespace gfx
-
diff --git a/base/gfx/font_utils.h b/base/gfx/font_utils.h
deleted file mode 100644
index f00db46..0000000
--- a/base/gfx/font_utils.h
+++ /dev/null
@@ -1,87 +0,0 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//
-// A collection of utilities for font handling.
-
-#ifndef BASE_GFX_FONT_UTILS_H__
-#define BASE_GFX_FONT_UTILS_H__
-
-#include <usp10.h>
-#include <wchar.h>
-#include <windows.h>
-
-#include <unicode/uscript.h>
-
-namespace gfx {
-
-// The order of family types needs to be exactly the same as
-// WebCore::FontDescription::GenericFamilyType. We may lift that restriction
-// when we make webkit_glue::WebkitGenericToChromeGenericFamily more
-// intelligent.
-enum GenericFamilyType {
- GENERIC_FAMILY_NONE = 0,
- GENERIC_FAMILY_STANDARD,
- GENERIC_FAMILY_SERIF,
- GENERIC_FAMILY_SANSSERIF,
- GENERIC_FAMILY_MONOSPACE,
- GENERIC_FAMILY_CURSIVE,
- GENERIC_FAMILY_FANTASY
-};
-
-// Return a font family that supports a script and belongs to |generic| font family.
-// It can return NULL and a caller has to implement its own fallback.
-const wchar_t* GetFontFamilyForScript(UScriptCode script,
- GenericFamilyType generic);
-
-// Return a font family that can render |characters| based on
-// what script characters belong to. When char_checked is non-NULL,
-// it's filled with the character used to determine the script.
-// When script_checked is non-NULL, the script used to determine
-// the family is returned.
-// TODO(jungshik) : This function needs a total overhaul.
-const wchar_t* GetFallbackFamily(const wchar_t* characters,
- int length,
- GenericFamilyType generic,
- UChar32 *char_checked,
- UScriptCode *script_checked);
-// Derive a new HFONT by replacing lfFaceName of LOGFONT with |family|,
-// calculate the ascent for the derived HFONT, and initialize SCRIPT_CACHE
-// in FontData.
-// |style| is only used for cache key generation. |style| is
-// bit-wise OR of BOLD(1), UNDERLINED(2) and ITALIC(4) and
-// should match what's contained in LOGFONT. It should be calculated
-// by calling GetStyleFromLogFont.
-// Returns false if the font is not accessible, in which case |ascent| field
-// of |fontdata| is set to kUndefinedAscent.
-// Be aware that this is not thread-safe.
-// TODO(jungshik): Instead of having three out params, we'd better have one
-// (|*FontData|), but somehow it mysteriously messes up the layout for
-// certain complex script pages (e.g. hi.wikipedia.org) and also crashes
-// at the start-up if recently visited page list includes pages with complex
-// scripts in their title. Moreover, somehow the very first-pass of
-// intl2 page-cycler test is noticeably slower with one out param than
-// the current version although the subsequent 9 passes take about the
-// same time.
-bool GetDerivedFontData(const wchar_t *family,
- int style,
- LOGFONT *logfont,
- int *ascent,
- HFONT *hfont,
- SCRIPT_CACHE **script_cache);
-
-enum {
- FONT_STYLE_NORMAL = 0,
- FONT_STYLE_BOLD = 1,
- FONT_STYLE_ITALIC = 2,
- FONT_STYLE_UNDERLINED = 4
-};
-
-// Derive style (bit-wise OR of FONT_STYLE_BOLD, FONT_STYLE_UNDERLINED, and
-// FONT_STYLE_ITALIC) from LOGFONT. Returns 0 if |*logfont| is NULL.
-int GetStyleFromLogfont(const LOGFONT *logfont);
-
-} // namespace gfx
-
-#endif // BASE_GFX_FONT_UTILS_H__
-
diff --git a/base/gfx/uniscribe.cc b/base/gfx/uniscribe.cc
deleted file mode 100644
index bdf4154..0000000
--- a/base/gfx/uniscribe.cc
+++ /dev/null
@@ -1,848 +0,0 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include <windows.h>
-
-#include "base/gfx/uniscribe.h"
-
-#include "base/gfx/font_utils.h"
-#include "base/logging.h"
-
-namespace gfx {
-
-// This function is used to see where word spacing should be applied inside
-// runs. Note that this must match Font::treatAsSpace so we all agree where
-// and how much space this is, so we don't want to do more general Unicode
-// "is this a word break" thing.
-static bool TreatAsSpace(wchar_t c) {
- return c == ' ' || c == '\t' || c == '\n' || c == 0x00A0;
-}
-
-// SCRIPT_FONTPROPERTIES contains glyph indices for default, invalid
-// and blank glyphs. Just because ScriptShape succeeds does not mean
-// that a text run is rendered correctly. Some characters may be rendered
-// with default/invalid/blank glyphs. Therefore, we need to check if the glyph
-// array returned by ScriptShape contains any of those glyphs to make
-// sure that the text run is rendered successfully.
-static bool ContainsMissingGlyphs(WORD *glyphs,
- int length,
- SCRIPT_FONTPROPERTIES* properties) {
- for (int i = 0; i < length; ++i) {
- if (glyphs[i] == properties->wgDefault ||
- (glyphs[i] == properties->wgInvalid && glyphs[i] != properties->wgBlank))
- return true;
- }
-
- return false;
-}
-
-// HFONT is the 'incarnation' of 'everything' about font, but it's an opaque
-// handle and we can't directly query it to make a new HFONT sharing
-// its characteristics (height, style, etc) except for family name.
-// This function uses GetObject to convert HFONT back to LOGFONT,
-// resets the fields of LOGFONT and calculates style to use later
-// for the creation of a font identical to HFONT other than family name.
-static void SetLogFontAndStyle(HFONT hfont, LOGFONT *logfont, int *style) {
- DCHECK(hfont && logfont);
- if (!hfont || !logfont)
- return;
-
- GetObject(hfont, sizeof(LOGFONT), logfont);
- // We reset these fields to values appropriate for CreateFontIndirect.
- // while keeping lfHeight, which is the most important value in creating
- // a new font similar to hfont.
- logfont->lfWidth = 0;
- logfont->lfEscapement = 0;
- logfont->lfOrientation = 0;
- logfont->lfCharSet = DEFAULT_CHARSET;
- logfont->lfOutPrecision = OUT_TT_ONLY_PRECIS;
- logfont->lfQuality = DEFAULT_QUALITY; // Honor user's desktop settings.
- logfont->lfPitchAndFamily = DEFAULT_PITCH | FF_DONTCARE;
- if (style)
- *style = gfx::GetStyleFromLogfont(logfont);
-}
-
-UniscribeState::UniscribeState(const wchar_t* input,
- int input_length,
- bool is_rtl,
- HFONT hfont,
- SCRIPT_CACHE* script_cache,
- SCRIPT_FONTPROPERTIES* font_properties)
- : input_(input),
- input_length_(input_length),
- is_rtl_(is_rtl),
- hfont_(hfont),
- script_cache_(script_cache),
- font_properties_(font_properties),
- directional_override_(false),
- inhibit_ligate_(false),
- letter_spacing_(0),
- space_width_(0),
- word_spacing_(0),
- ascent_(0) {
- logfont_.lfFaceName[0] = 0;
-}
-
-UniscribeState::~UniscribeState() {
-}
-
-void UniscribeState::InitWithOptionalLengthProtection(bool length_protection) {
- // We cap the input length and just don't do anything. We'll allocate a lot
- // of things of the size of the number of characters, so the allocated memory
- // will be several times the input length. Plus shaping such a large buffer
- // may be a form of denial of service. No legitimate text should be this long.
- // It also appears that Uniscribe flatly rejects very long strings, so we
- // don't lose anything by doing this.
- //
- // The input length protection may be disabled by the unit tests to cause
- // an error condition.
- static const int kMaxInputLength = 65535;
- if (input_length_ == 0 ||
- (length_protection && input_length_ > kMaxInputLength))
- return;
-
- FillRuns();
- FillShapes();
- FillScreenOrder();
-}
-
-int UniscribeState::Width() const {
- int width = 0;
- for (int item_index = 0; item_index < static_cast<int>(runs_->size());
- item_index++) {
- width += AdvanceForItem(item_index);
- }
- return width;
-}
-
-void UniscribeState::Justify(int additional_space) {
- // Count the total number of glyphs we have so we know how big to make the
- // buffers below.
- int total_glyphs = 0;
- for (size_t run = 0; run < runs_->size(); run++) {
- int run_idx = screen_order_[run];
- total_glyphs += static_cast<int>(shapes_[run_idx].glyph_length());
- }
- if (total_glyphs == 0)
- return; // Nothing to do.
-
- // We make one big buffer in screen order of all the glyphs we are drawing
- // across runs so that the justification function will adjust evenly across
- // all glyphs.
- StackVector<SCRIPT_VISATTR, 64> visattr;
- visattr->resize(total_glyphs);
- StackVector<int, 64> advances;
- advances->resize(total_glyphs);
- StackVector<int, 64> justify;
- justify->resize(total_glyphs);
-
- // Build the packed input.
- int dest_index = 0;
- for (size_t run = 0; run < runs_->size(); run++) {
- int run_idx = screen_order_[run];
- const Shaping& shaping = shapes_[run_idx];
-
- for (int i = 0; i < shaping.glyph_length(); i++, dest_index++) {
- memcpy(&visattr[dest_index], &shaping.visattr[i], sizeof(SCRIPT_VISATTR));
- advances[dest_index] = shaping.advance[i];
- }
- }
-
- // The documentation for ScriptJustify is wrong, the parameter is the space
- // to add and not the width of the column you want.
- const int min_kashida = 1; // How do we decide what this should be?
- ScriptJustify(&visattr[0], &advances[0], total_glyphs, additional_space,
- min_kashida, &justify[0]);
-
- // Now we have to unpack the justification amounts back into the runs so
- // the glyph indices match.
- int global_glyph_index = 0;
- for (size_t run = 0; run < runs_->size(); run++) {
- int run_idx = screen_order_[run];
- Shaping& shaping = shapes_[run_idx];
-
- shaping.justify->resize(shaping.glyph_length());
- for (int i = 0; i < shaping.glyph_length(); i++, global_glyph_index++)
- shaping.justify[i] = justify[global_glyph_index];
- }
-}
-
-int UniscribeState::CharacterToX(int offset) const {
- HRESULT hr;
- DCHECK(offset <= input_length_);
-
- // Our algorithm is to traverse the items in screen order from left to
- // right, adding in each item's screen width until we find the item with
- // the requested character in it.
- int width = 0;
- for (size_t screen_idx = 0; screen_idx < runs_->size(); screen_idx++) {
- // Compute the length of this run.
- int item_idx = screen_order_[screen_idx];
- const SCRIPT_ITEM& item = runs_[item_idx];
- const Shaping& shaping = shapes_[item_idx];
- int item_length = shaping.char_length();
-
- if (offset >= item.iCharPos && offset <= item.iCharPos + item_length) {
- // Character offset is in this run.
- int char_len = offset - item.iCharPos;
-
- int cur_x = 0;
- hr = ScriptCPtoX(char_len, FALSE, item_length, shaping.glyph_length(),
- &shaping.logs[0], &shaping.visattr[0],
- shaping.effective_advances(), &item.a, &cur_x);
- if (FAILED(hr))
- return 0;
-
- width += cur_x + shaping.pre_padding;
- DCHECK(width >= 0);
- return width;
- }
-
- // Move to the next item.
- width += AdvanceForItem(item_idx);
- }
- DCHECK(width >= 0);
- return width;
-}
-
-int UniscribeState::XToCharacter(int x) const {
- // We iterate in screen order until we find the item with the given pixel
- // position in it. When we find that guy, we ask Uniscribe for the
- // character index.
- HRESULT hr;
- for (size_t screen_idx = 0; screen_idx < runs_->size(); screen_idx++) {
- int item_idx = screen_order_[screen_idx];
- int advance_for_item = AdvanceForItem(item_idx);
-
- // Note that the run may be empty if shaping failed, so we want to skip
- // over it.
- const Shaping& shaping = shapes_[item_idx];
- int item_length = shaping.char_length();
- if (x <= advance_for_item && item_length > 0) {
- // The requested offset is within this item.
- const SCRIPT_ITEM& item = runs_[item_idx];
-
- // Account for the leading space we've added to this run that Uniscribe
- // doesn't know about.
- x -= shaping.pre_padding;
-
- int char_x = 0;
- int trailing;
- hr = ScriptXtoCP(x, item_length, shaping.glyph_length(),
- &shaping.logs[0], &shaping.visattr[0],
- shaping.effective_advances(), &item.a, &char_x,
- &trailing);
-
- // The character offset is within the item. We need to add the item's
- // offset to transform it into the space of the TextRun
- return char_x + item.iCharPos;
- }
-
- // The offset is beyond this item, account for its length and move on.
- x -= advance_for_item;
- }
-
- // Error condition, we don't know what to do if we don't have that X
- // position in any of our items.
- return 0;
-}
-
-void UniscribeState::Draw(HDC dc, int x, int y, int from, int to) {
- HGDIOBJ old_font = 0;
- int cur_x = x;
- bool first_run = true;
-
- for (size_t screen_idx = 0; screen_idx < runs_->size(); screen_idx++) {
- int item_idx = screen_order_[screen_idx];
- const SCRIPT_ITEM& item = runs_[item_idx];
- const Shaping& shaping = shapes_[item_idx];
-
- // Character offsets within this run. THESE MAY NOT BE IN RANGE and may
- // be negative, etc. The code below handles this.
- int from_char = from - item.iCharPos;
- int to_char = to - item.iCharPos;
-
- // See if we need to draw any characters in this item.
- if (shaping.char_length() == 0 ||
- from_char >= shaping.char_length() || to_char <= 0) {
- // No chars in this item to display.
- cur_x += AdvanceForItem(item_idx);
- continue;
- }
-
- // Compute the starting glyph within this span. |from| and |to| are
- // global offsets that may intersect arbitrarily with our local run.
- int from_glyph, after_glyph;
- if (item.a.fRTL) {
- // To compute the first glyph when going RTL, we use |to|.
- if (to_char >= shaping.char_length()) {
- // The end of the text is after (to the left) of us.
- from_glyph = 0;
- } else {
- // Since |to| is exclusive, the first character we draw on the left
- // is actually the one right before (to the right) of |to|.
- from_glyph = shaping.logs[to_char - 1];
- }
-
- // The last glyph is actually the first character in the range.
- if (from_char <= 0) {
- // The first character to draw is before (to the right) of this span,
- // so draw all the way to the end.
- after_glyph = shaping.glyph_length();
- } else {
- // We want to draw everything up until the character to the right of
- // |from|. To the right is - 1, so we look that up (remember our
- // character could be more than one glyph, so we can't look up our
- // glyph and add one).
- after_glyph = shaping.logs[from_char - 1];
- }
- } else {
- // Easy case, everybody agrees about directions. We only need to handle
- // boundary conditions to get a range inclusive at the beginning, and
- // exclusive at the ending. We have to do some computation to see the
- // glyph one past the end.
- from_glyph = shaping.logs[from_char < 0 ? 0 : from_char];
- if (to_char >= shaping.char_length())
- after_glyph = shaping.glyph_length();
- else
- after_glyph = shaping.logs[to_char];
- }
-
- // Account for the characters that were skipped in this run. When
- // WebKit asks us to draw a subset of the run, it actually tells us
- // to draw at the X offset of the beginning of the run, since it
- // doesn't know the internal position of any of our characters.
- const int* effective_advances = shaping.effective_advances();
- int inner_offset = 0;
- for (int i = 0; i < from_glyph; i++)
- inner_offset += effective_advances[i];
-
- // Actually draw the glyphs we found.
- int glyph_count = after_glyph - from_glyph;
- if (from_glyph >= 0 && glyph_count > 0) {
- // Account for the preceeding space we need to add to this run. We don't
- // need to count for the following space because that will be counted
- // in AdvanceForItem below when we move to the next run.
- inner_offset += shaping.pre_padding;
-
- // Pass NULL in when there is no justification.
- const int* justify = shaping.justify->empty() ?
- NULL : &shaping.justify[from_glyph];
-
- if (first_run) {
- old_font = SelectObject(dc, shaping.hfont_);
- first_run = false;
- } else {
- SelectObject(dc, shaping.hfont_);
- }
-
- // TODO(brettw) bug 698452: if a half a character is selected,
- // we should set up a clip rect so we draw the half of the glyph
- // correctly.
- // Fonts with different ascents can be used to render different runs.
- // 'Across-runs' y-coordinate correction needs to be adjusted
- // for each font.
- HRESULT hr = S_FALSE;
- for (int executions = 0; executions < 2; ++executions) {
- hr = ScriptTextOut(dc, shaping.script_cache_, cur_x + inner_offset,
- y - shaping.ascent_offset_, 0, NULL, &item.a, NULL,
- 0, &shaping.glyphs[from_glyph],
- glyph_count, &shaping.advance[from_glyph],
- justify, &shaping.offsets[from_glyph]);
- if (S_OK != hr && 0 == executions) {
- // If this ScriptTextOut is called from the renderer it might fail
- // because the sandbox is preventing it from opening the font files.
- // If we are running in the renderer, TryToPreloadFont is overridden
- // to ask the browser to preload the font for us so we can access it.
- TryToPreloadFont(shaping.hfont_);
- continue;
- }
- break;
- }
-
- DCHECK(S_OK == hr);
-
-
- }
-
- cur_x += AdvanceForItem(item_idx);
- }
-
- if (old_font)
- SelectObject(dc, old_font);
-}
-
-WORD UniscribeState::FirstGlyphForCharacter(int char_offset) const {
- // Find the run for the given character.
- for (int i = 0; i < static_cast<int>(runs_->size()); i++) {
- int first_char = runs_[i].iCharPos;
- const Shaping& shaping = shapes_[i];
- int local_offset = char_offset - first_char;
- if (local_offset >= 0 && local_offset < shaping.char_length()) {
- // The character is in this run, return the first glyph for it (should
- // generally be the only glyph). It seems Uniscribe gives glyph 0 for
- // empty, which is what we want to return in the "missing" case.
- size_t glyph_index = shaping.logs[local_offset];
- if (glyph_index >= shaping.glyphs->size()) {
- // The glyph should be in this run, but the run has too few actual
- // characters. This can happen when shaping the run fails, in which
- // case, we should have no data in the logs at all.
- DCHECK(shaping.glyphs->empty());
- return 0;
- }
- return shaping.glyphs[glyph_index];
- }
- }
- return 0;
-}
-
-void UniscribeState::FillRuns() {
- HRESULT hr;
- runs_->resize(UNISCRIBE_STATE_STACK_RUNS);
-
- SCRIPT_STATE input_state;
- input_state.uBidiLevel = is_rtl_;
- input_state.fOverrideDirection = directional_override_;
- input_state.fInhibitSymSwap = false;
- input_state.fCharShape = false; // Not implemented in Uniscribe
- input_state.fDigitSubstitute = false; // Do we want this for Arabic?
- input_state.fInhibitLigate = inhibit_ligate_;
- input_state.fDisplayZWG = false; // Don't draw control characters.
- input_state.fArabicNumContext = is_rtl_; // Do we want this for Arabic?
- input_state.fGcpClusters = false;
- input_state.fReserved = 0;
- input_state.fEngineReserved = 0;
- // The psControl argument to ScriptItemize should be non-NULL for RTL text,
- // per http://msdn.microsoft.com/en-us/library/ms776532.aspx . So use a
- // SCRIPT_CONTROL that is set to all zeros. Zero as a locale ID means the
- // neutral locale per http://msdn.microsoft.com/en-us/library/ms776294.aspx .
- static SCRIPT_CONTROL input_control = {0, // uDefaultLanguage :16;
- 0, // fContextDigits :1;
- 0, // fInvertPreBoundDir :1;
- 0, // fInvertPostBoundDir :1;
- 0, // fLinkStringBefore :1;
- 0, // fLinkStringAfter :1;
- 0, // fNeutralOverride :1;
- 0, // fNumericOverride :1;
- 0, // fLegacyBidiClass :1;
- 0, // fMergeNeutralItems :1;
- 0};// fReserved :7;
- // Calling ScriptApplyDigitSubstitution( NULL, &input_control, &input_state)
- // here would be appropriate if we wanted to set the language ID, and get
- // local digit substitution behavior. For now, don't do it.
-
- while (true) {
- int num_items = 0;
-
- // Ideally, we would have a way to know the runs before and after this
- // one, and put them into the control parameter of ScriptItemize. This
- // would allow us to shape characters properly that cross style
- // boundaries (WebKit bug 6148).
- //
- // We tell ScriptItemize that the output list of items is one smaller
- // than it actually is. According to Mozilla bug 366643, if there is
- // not enough room in the array on pre-SP2 systems, ScriptItemize will
- // write one past the end of the buffer.
- //
- // ScriptItemize is very strange. It will often require a much larger
- // ITEM buffer internally than it will give us as output. For example,
- // it will say a 16-item buffer is not big enough, and will write
- // interesting numbers into all those items. But when we give it a 32
- // item buffer and it succeeds, it only has one item output.
- //
- // It seems to be doing at least two passes, the first where it puts a
- // lot of intermediate data into our items, and the second where it
- // collates them.
- hr = ScriptItemize(input_, input_length_,
- static_cast<int>(runs_->size()) - 1, &input_control, &input_state,
- &runs_[0], &num_items);
- if (SUCCEEDED(hr)) {
- runs_->resize(num_items);
- break;
- }
- if (hr != E_OUTOFMEMORY) {
- // Some kind of unexpected error.
- runs_->resize(0);
- break;
- }
- // There was not enough items for it to write into, expand.
- runs_->resize(runs_->size() * 2);
- }
-
- // Fix up the directions of the items so they're what WebKit thinks
- // they are. WebKit (and we assume any other caller) always knows what
- // direction it wants things to be in, and will only give us runs that are in
- // the same direction. Sometimes, Uniscibe disagrees, for example, if you
- // have embedded ASCII punctuation in an Arabic string, WebKit will
- // (correctly) know that is should still be rendered RTL, but Uniscibe might
- // think LTR is better.
- //
- // TODO(brettw) bug 747235:
- // This workaround fixes the bug but causes spacing problems in other cases.
- // WebKit sometimes gives us a big run that includes ASCII and Arabic, and
- // this forcing direction makes those cases incorrect. This seems to happen
- // during layout only, so it ends up that spacing is incorrect (because being
- // the wrong direction changes ligatures and stuff).
- //
- //for (size_t i = 0; i < runs_->size(); i++)
- // runs_[i].a.fRTL = is_rtl_;
-}
-
-
-bool UniscribeState::Shape(const wchar_t* input,
- int item_length,
- int num_glyphs,
- SCRIPT_ITEM& run,
- Shaping& shaping) {
- HFONT hfont = hfont_;
- SCRIPT_CACHE* script_cache = script_cache_;
- SCRIPT_FONTPROPERTIES* font_properties = font_properties_;
- int ascent = ascent_;
- HDC temp_dc = NULL;
- HGDIOBJ old_font = 0;
- HRESULT hr;
- bool lastFallbackTried = false;
- bool result;
-
- int generated_glyphs = 0;
-
- // In case HFONT passed in ctor cannot render this run, we have to scan
- // other fonts from the beginning of the font list.
- ResetFontIndex();
-
- // Compute shapes.
- while (true) {
- shaping.logs->resize(item_length);
- shaping.glyphs->resize(num_glyphs);
- shaping.visattr->resize(num_glyphs);
-
- // Firefox sets SCRIPT_ANALYSIS.SCRIPT_STATE.fDisplayZWG to true
- // here. Is that what we want? It will display control characters.
- hr = ScriptShape(temp_dc, script_cache, input, item_length,
- num_glyphs, &run.a,
- &shaping.glyphs[0], &shaping.logs[0],
- &shaping.visattr[0], &generated_glyphs);
- if (hr == E_PENDING) {
- // Allocate the DC.
- temp_dc = GetDC(NULL);
- old_font = SelectObject(temp_dc, hfont);
- continue;
- } else if (hr == E_OUTOFMEMORY) {
- num_glyphs *= 2;
- continue;
- } else if (SUCCEEDED(hr) &&
- (lastFallbackTried || !ContainsMissingGlyphs(&shaping.glyphs[0],
- generated_glyphs, font_properties))) {
- break;
- }
-
- // The current font can't render this run. clear DC and try
- // next font.
- if (temp_dc) {
- SelectObject(temp_dc, old_font);
- ReleaseDC(NULL, temp_dc);
- temp_dc = NULL;
- }
-
- if (NextWinFontData(&hfont, &script_cache, &font_properties, &ascent)) {
- // The primary font does not support this run. Try next font.
- // In case of web page rendering, they come from fonts specified in
- // CSS stylesheets.
- continue;
- } else if (!lastFallbackTried) {
- lastFallbackTried = true;
-
- // Generate a last fallback font based on the script of
- // a character to draw while inheriting size and styles
- // from the primary font
- if (!logfont_.lfFaceName[0])
- SetLogFontAndStyle(hfont_, &logfont_, &style_);
-
- // TODO(jungshik): generic type should come from webkit for
- // UniscribeStateTextRun (a derived class used in webkit).
- const wchar_t *family = GetFallbackFamily(input, item_length,
- GENERIC_FAMILY_STANDARD, NULL, NULL);
- bool font_ok = GetDerivedFontData(family, style_, &logfont_, &ascent, &hfont, &script_cache);
-
- if (!font_ok) {
- // If this GetDerivedFontData is called from the renderer it might fail
- // because the sandbox is preventing it from opening the font files.
- // If we are running in the renderer, TryToPreloadFont is overridden to
- // ask the browser to preload the font for us so we can access it.
- TryToPreloadFont(hfont);
-
- // Try again.
- font_ok = GetDerivedFontData(family, style_, &logfont_, &ascent, &hfont, &script_cache);
- DCHECK(font_ok);
- }
-
- // TODO(jungshik) : Currently GetDerivedHFont always returns a
- // a valid HFONT, but in the future, I may change it to return 0.
- DCHECK(hfont);
-
- // We don't need a font_properties for the last resort fallback font
- // because we don't have anything more to try and are forced to
- // accept empty glyph boxes. If we tried a series of fonts as
- // 'last-resort fallback', we'd need it, but currently, we don't.
- continue;
- } else if (hr == USP_E_SCRIPT_NOT_IN_FONT) {
- run.a.eScript = SCRIPT_UNDEFINED;
- continue;
- } else if (FAILED(hr)) {
- // Error shaping.
- generated_glyphs = 0;
- result = false;
- goto cleanup;
- }
- }
-
- // Sets Windows font data for this run to those corresponding to
- // a font supporting this run. we don't need to store font_properties
- // because it's not used elsewhere.
- shaping.hfont_ = hfont;
- shaping.script_cache_ = script_cache;
-
- // The ascent of a font for this run can be different from
- // that of the primary font so that we need to keep track of
- // the difference per run and take that into account when calling
- // ScriptTextOut in |Draw|. Otherwise, different runs rendered by
- // different fonts would not be aligned vertically.
- shaping.ascent_offset_ = ascent_ ? ascent - ascent_ : 0;
- result = true;
-
-cleanup:
- shaping.glyphs->resize(generated_glyphs);
- shaping.visattr->resize(generated_glyphs);
- shaping.advance->resize(generated_glyphs);
- shaping.offsets->resize(generated_glyphs);
- if (temp_dc) {
- SelectObject(temp_dc, old_font);
- ReleaseDC(NULL, temp_dc);
- }
- // On failure, our logs don't mean anything, so zero those out.
- if (!result)
- shaping.logs->clear();
-
- return result;
-}
-
-void UniscribeState::FillShapes() {
- shapes_->resize(runs_->size());
- for (size_t i = 0; i < runs_->size(); i++) {
- int start_item = runs_[i].iCharPos;
- int item_length = input_length_ - start_item;
- if (i < runs_->size() - 1)
- item_length = runs_[i + 1].iCharPos - start_item;
-
- int num_glyphs;
- if (item_length < UNISCRIBE_STATE_STACK_CHARS) {
- // We'll start our buffer sizes with the current stack space available
- // in our buffers if the current input fits. As long as it
- // doesn't expand past that we'll save a lot of time mallocing.
- num_glyphs = UNISCRIBE_STATE_STACK_CHARS;
- } else {
- // When the input doesn't fit, give up with the stack since it will
- // almost surely not be enough room (unless the input actually shrinks,
- // which is unlikely) and just start with the length recommended by
- // the Uniscribe documentation as a "usually fits" size.
- num_glyphs = item_length * 3 / 2 + 16;
- }
-
- // Convert a string to a glyph string trying the primary font,
- // fonts in the fallback list and then script-specific last resort font.
- Shaping& shaping = shapes_[i];
- if (!Shape(&input_[start_item], item_length, num_glyphs, runs_[i], shaping))
- continue;
-
- // Compute placements. Note that offsets is documented incorrectly
- // and is actually an array.
-
- // DC that we lazily create if Uniscribe commands us to.
- // (this does not happen often because script_cache is already
- // updated when calling ScriptShape).
- HDC temp_dc = NULL;
- HGDIOBJ old_font = NULL;
- HRESULT hr;
- while (true) {
- shaping.pre_padding = 0;
- hr = ScriptPlace(temp_dc, shaping.script_cache_, &shaping.glyphs[0],
- static_cast<int>(shaping.glyphs->size()),
- &shaping.visattr[0], &runs_[i].a,
- &shaping.advance[0], &shaping.offsets[0],
- &shaping.abc);
- if (hr != E_PENDING)
- break;
-
- // Allocate the DC and run the loop again.
- temp_dc = GetDC(NULL);
- old_font = SelectObject(temp_dc, shaping.hfont_);
- }
-
- if (FAILED(hr)) {
- // Some error we don't know how to handle. Nuke all of our data
- // since we can't deal with partially valid data later.
- runs_->clear();
- shapes_->clear();
- screen_order_->clear();
- }
-
- if (temp_dc) {
- SelectObject(temp_dc, old_font);
- ReleaseDC(NULL, temp_dc);
- }
- }
-
- AdjustSpaceAdvances();
-
- if (letter_spacing_ != 0 || word_spacing_ != 0)
- ApplySpacing();
-}
-
-void UniscribeState::FillScreenOrder() {
- screen_order_->resize(runs_->size());
-
- // We assume that the input has only one text direction in it.
- // TODO(brettw) are we sure we want to keep this restriction?
- if (is_rtl_) {
- for (int i = 0; i < static_cast<int>(screen_order_->size()); i++)
- screen_order_[static_cast<int>(screen_order_->size()) - i - 1] = i;
- } else {
- for (int i = 0; i < static_cast<int>(screen_order_->size()); i++)
- screen_order_[i] = i;
- }
-}
-
-void UniscribeState::AdjustSpaceAdvances() {
- if (space_width_ == 0)
- return;
-
- int space_width_without_letter_spacing = space_width_ - letter_spacing_;
-
- // This mostly matches what WebKit's UniscribeController::shapeAndPlaceItem.
- for (size_t run = 0; run < runs_->size(); run++) {
- Shaping& shaping = shapes_[run];
-
- for (int i = 0; i < shaping.char_length(); i++) {
- if (!TreatAsSpace(input_[runs_[run].iCharPos + i]))
- continue;
-
- int glyph_index = shaping.logs[i];
- int current_advance = shaping.advance[glyph_index];
- // Don't give zero-width spaces a width.
- if (!current_advance)
- continue;
-
- // current_advance does not include additional letter-spacing, but
- // space_width does. Here we find out how off we are from the correct
- // width for the space not including letter-spacing, then just subtract
- // that diff.
- int diff = current_advance - space_width_without_letter_spacing;
- // The shaping can consist of a run of text, so only subtract the
- // difference in the width of the glyph.
- shaping.advance[glyph_index] -= diff;
- shaping.abc.abcB -= diff;
- }
- }
-}
-
-void UniscribeState::ApplySpacing() {
- for (size_t run = 0; run < runs_->size(); run++) {
- Shaping& shaping = shapes_[run];
- bool is_rtl = runs_[run].a.fRTL;
-
- if (letter_spacing_ != 0) {
- // RTL text gets padded to the left of each character. We increment the
- // run's advance to make this happen. This will be balanced out by NOT
- // adding additional advance to the last glyph in the run.
- if (is_rtl)
- shaping.pre_padding += letter_spacing_;
-
- // Go through all the glyphs in this run and increase the "advance" to
- // account for letter spacing. We adjust letter spacing only on cluster
- // boundaries.
- //
- // This works for most scripts, but may have problems with some indic
- // scripts. This behavior is better than Firefox or IE for Hebrew.
- for (int i = 0; i < shaping.glyph_length(); i++) {
- if (shaping.visattr[i].fClusterStart) {
- // Ick, we need to assign the extra space so that the glyph comes
- // first, then is followed by the space. This is opposite for RTL.
- if (is_rtl) {
- if (i != shaping.glyph_length() - 1) {
- // All but the last character just get the spacing applied to
- // their advance. The last character doesn't get anything,
- shaping.advance[i] += letter_spacing_;
- shaping.abc.abcB += letter_spacing_;
- }
- } else {
- // LTR case is easier, we just add to the advance.
- shaping.advance[i] += letter_spacing_;
- shaping.abc.abcB += letter_spacing_;
- }
- }
- }
- }
-
- // Go through all the characters to find whitespace and insert the extra
- // wordspacing amount for the glyphs they correspond to.
- if (word_spacing_ != 0) {
- for (int i = 0; i < shaping.char_length(); i++) {
- if (!TreatAsSpace(input_[runs_[run].iCharPos + i]))
- continue;
-
- // The char in question is a word separator...
- int glyph_index = shaping.logs[i];
-
- // Spaces will not have a glyph in Uniscribe, it will just add
- // additional advance to the character to the left of the space. The
- // space's corresponding glyph will be the character following it in
- // reading order.
- if (is_rtl) {
- // In RTL, the glyph to the left of the space is the same as the
- // first glyph of the following character, so we can just increment
- // it.
- shaping.advance[glyph_index] += word_spacing_;
- shaping.abc.abcB += word_spacing_;
- } else {
- // LTR is actually more complex here, we apply it to the previous
- // character if there is one, otherwise we have to apply it to the
- // leading space of the run.
- if (glyph_index == 0) {
- shaping.pre_padding += word_spacing_;
- } else {
- shaping.advance[glyph_index - 1] += word_spacing_;
- shaping.abc.abcB += word_spacing_;
- }
- }
- }
- } // word_spacing_ != 0
-
- // Loop for next run...
- }
-}
-
-// The advance is the ABC width of the run
-int UniscribeState::AdvanceForItem(int item_index) const {
- int accum = 0;
- const Shaping& shaping = shapes_[item_index];
-
- if (shaping.justify->empty()) {
- // Easy case with no justification, the width is just the ABC width of t
- // the run. (The ABC width is the sum of the advances).
- return shaping.abc.abcA + shaping.abc.abcB + shaping.abc.abcC +
- shaping.pre_padding;
- }
-
- // With justification, we use the justified amounts instead. The
- // justification array contains both the advance and the extra space
- // added for justification, so is the width we want.
- int justification = 0;
- for (size_t i = 0; i < shaping.justify->size(); i++)
- justification += shaping.justify[i];
-
- return shaping.pre_padding + justification;
-}
-
-} // namespace gfx
-
diff --git a/base/gfx/uniscribe.h b/base/gfx/uniscribe.h
deleted file mode 100644
index 162e577..0000000
--- a/base/gfx/uniscribe.h
+++ /dev/null
@@ -1,366 +0,0 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-//
-// A wrapper around Uniscribe that provides a reasonable API.
-
-#ifndef BASE_GFX_UNISCRIBE_H__
-#define BASE_GFX_UNISCRIBE_H__
-
-#include <windows.h>
-#include <usp10.h>
-#include <wchar.h>
-#include <map>
-#include <vector>
-
-#include "base/stack_container.h"
-#include "testing/gtest/include/gtest/gtest_prod.h"
-
-namespace gfx {
-
-#define UNISCRIBE_STATE_STACK_RUNS 8
-#define UNISCRIBE_STATE_STACK_CHARS 32
-
-// This object should be safe to create & destroy frequently, as long as the
-// caller preserves the script_cache when possible (this data may be slow to
-// compute).
-//
-// This object is "kind of large" (~1K) because it reserves a lot of space for
-// working with to avoid expensive heap operations. Therefore, not only should
-// you not worry about creating and destroying it, you should try to not keep
-// them around.
-class UniscribeState {
- public:
- // Initializes this Uniscribe run with the text pointed to by |run| with
- // |length|. The input is NOT null terminated.
- //
- // The is_rtl flag should be set if the input script is RTL. It is assumed
- // that the caller has already divided up the input text (using ICU, for
- // example) into runs of the same direction of script. This avoids
- // disagreements between the caller and Uniscribe later (see FillItems).
- //
- // A script cache should be provided by the caller that is initialized to
- // NULL. When the caller is done with the cache (it may be stored between
- // runs as long as it is used consistently with the same HFONT), it should
- // call ScriptFreeCache().
- UniscribeState(const wchar_t* input,
- int input_length,
- bool is_rtl,
- HFONT hfont,
- SCRIPT_CACHE* script_cache,
- SCRIPT_FONTPROPERTIES* font_properties);
-
- virtual ~UniscribeState();
-
- // Sets Uniscribe's directional override flag. False by default.
- bool directional_override() const {
- return directional_override_;
- }
- void set_directional_override(bool override) {
- directional_override_ = override;
- }
-
- // Set's Uniscribe's no-ligate override flag. False by default.
- bool inhibit_ligate() const {
- return inhibit_ligate_;
- }
- void set_inhibit_ligate(bool inhibit) {
- inhibit_ligate_ = inhibit;
- }
-
- // Set letter spacing. We will try to insert this much space between
- // graphemes (one or more glyphs perceived as a single unit by ordinary users
- // of a script). Positive values increase letter spacing, negative values
- // decrease it. 0 by default.
- int letter_spacing() const {
- return letter_spacing_;
- }
- void set_letter_spacing(int letter_spacing) {
- letter_spacing_ = letter_spacing;
- }
-
- // Set the width of a standard space character. We use this to normalize
- // space widths. Windows will make spaces after Hindi characters larger than
- // other spaces. A space_width of 0 means to use the default space width.
- //
- // Must be set before Init() is called.
- int space_width() const {
- return space_width_;
- }
- void set_space_width(int space_width) {
- space_width_ = space_width;
- }
-
- // Set word spacing. We will try to insert this much extra space between
- // each word in the input (beyond whatever whitespace character separates
- // words). Positive values lead to increased letter spacing, negative values
- // decrease it. 0 by default.
- //
- // Must be set before Init() is called.
- int word_spacing() const {
- return word_spacing_;
- }
- void set_word_spacing(int word_spacing) {
- word_spacing_ = word_spacing;
- }
- void set_ascent(int ascent) {
- ascent_ = ascent;
- }
-
- // You must call this after setting any options but before doing any
- // other calls like asking for widths or drawing.
- void Init() { InitWithOptionalLengthProtection(true); }
-
- // Returns the total width in pixels of the text run.
- int Width() const;
-
- // Call to justify the text, with the amount of space that should be ADDED to
- // get the desired width that the column should be justified to. Normally,
- // spaces are inserted, but for Arabic there will be kashidas (extra strokes)
- // inserted instead.
- //
- // This function MUST be called AFTER Init().
- void Justify(int additional_space);
-
- // Computes the given character offset into a pixel offset of the beginning
- // of that character.
- int CharacterToX(int offset) const;
-
- // Converts the given pixel X position into a logical character offset into
- // the run. For positions appearing before the first character, this will
- // return -1.
- int XToCharacter(int x) const;
-
- // Draws the given characters to (x, y) in the given DC. The font will be
- // handled by this function, but the font color and other attributes should
- // be pre-set.
- //
- // The y position is the upper left corner, NOT the baseline.
- void Draw(HDC dc, int x, int y, int from, int to);
-
- // Returns the first glyph assigned to the character at the given offset.
- // This function is used to retrieve glyph information when Uniscribe is
- // being used to generate glyphs for non-complex, non-BMP (above U+FFFF)
- // characters. These characters are not otherwise special and have no
- // complex shaping rules, so we don't otherwise need Uniscribe, except
- // Uniscribe is the only way to get glyphs for non-BMP characters.
- //
- // Returns 0 if there is no glyph for the given character.
- WORD FirstGlyphForCharacter(int char_offset) const;
-
- protected:
- // Backend for init. The flag allows the unit test to specify whether we
- // should fail early for very long strings like normal, or try to pass the
- // long string to Uniscribe. The latter provides a way to force failure of
- // shaping.
- void InitWithOptionalLengthProtection(bool length_protection);
-
- // Tries to preload the font when the it is not accessible.
- // This is the default implementation and it does not do anything.
- virtual void TryToPreloadFont(HFONT font) {}
-
- private:
- FRIEND_TEST(UniscribeTest, TooBig);
-
- // An array corresponding to each item in runs_ containing information
- // on each of the glyphs that were generated. Like runs_, this is in
- // reading order. However, for rtl text, the characters within each
- // item will be reversed.
- struct Shaping {
- Shaping()
- : pre_padding(0),
- hfont_(NULL),
- script_cache_(NULL),
- ascent_offset_(0) {
- abc.abcA = 0;
- abc.abcB = 0;
- abc.abcC = 0;
- }
-
- // Returns the number of glyphs (which will be drawn to the screen)
- // in this run.
- int glyph_length() const {
- return static_cast<int>(glyphs->size());
- }
-
- // Returns the number of characters (that we started with) in this run.
- int char_length() const {
- return static_cast<int>(logs->size());
- }
-
- // Returns the advance array that should be used when measuring glyphs.
- // The returned pointer will indicate an array with glyph_length() elements
- // and the advance that should be used for each one. This is either the
- // real advance, or the justified advances if there is one, and is the
- // array we want to use for measurement.
- const int* effective_advances() const {
- if (advance->empty())
- return 0;
- if (justify->empty())
- return &advance[0];
- return &justify[0];
- }
-
- // This is the advance amount of space that we have added to the beginning
- // of the run. It is like the ABC's |A| advance but one that we create and
- // must handle internally whenever computing with pixel offsets.
- int pre_padding;
-
- // Glyph indices in the font used to display this item. These indices
- // are in screen order.
- StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> glyphs;
-
- // For each input character, this tells us the first glyph index it
- // generated. This is the only array with size of the input chars.
- //
- // All offsets are from the beginning of this run. Multiple characters can
- // generate one glyph, in which case there will be adjacent duplicates in
- // this list. One character can also generate multiple glyphs, in which
- // case there will be skipped indices in this list.
- StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> logs;
-
- // Flags and such for each glyph.
- StackVector<SCRIPT_VISATTR, UNISCRIBE_STATE_STACK_CHARS> visattr;
-
- // Horizontal advances for each glyph listed above, this is basically
- // how wide each glyph is.
- StackVector<int, UNISCRIBE_STATE_STACK_CHARS> advance;
-
- // This contains glyph offsets, from the nominal position of a glyph. It
- // is used to adjust the positions of multiple combining characters
- // around/above/below base characters in a context-sensitive manner so
- // that they don't bump against each other and the base character.
- StackVector<GOFFSET, UNISCRIBE_STATE_STACK_CHARS> offsets;
-
- // Filled by a call to Justify, this is empty for nonjustified text.
- // If nonempty, this contains the array of justify characters for each
- // character as returned by ScriptJustify.
- //
- // This is the same as the advance array, but with extra space added for
- // some characters. The difference between a glyph's |justify| width and
- // it's |advance| width is the extra space added.
- StackVector<int, UNISCRIBE_STATE_STACK_CHARS> justify;
-
- // Sizing information for this run. This treats the entire run as a
- // character with a preceeding advance, width, and ending advance.
- // The B width is the sum of the |advance| array, and the A and C widths
- // are any extra spacing applied to each end.
- //
- // It is unclear from the documentation what this actually means. From
- // experimentation, it seems that the sum of the character advances is
- // always the sum of the ABC values, and I'm not sure what you're supposed
- // to do with the ABC values.
- ABC abc;
-
- // Pointers to windows font data used to render this run.
- HFONT hfont_;
- SCRIPT_CACHE* script_cache_;
-
- // Ascent offset between the ascent of the primary font
- // and that of the fallback font. The offset needs to be applied,
- // when drawing a string, to align multiple runs rendered with
- // different fonts.
- int ascent_offset_;
- };
-
- // Computes the runs_ array from the text run.
- void FillRuns();
-
- // Computes the shapes_ array given an runs_ array already filled in.
- void FillShapes();
-
- // Fills in the screen_order_ array (see below).
- void FillScreenOrder();
-
- // Called to update the glyph positions based on the current spacing options
- // that are set.
- void ApplySpacing();
-
- // Normalizes all advances for spaces to the same width. This keeps windows
- // from making spaces after Hindi characters larger, which is then
- // inconsistent with our meaure of the width since WebKit doesn't include
- // spaces in text-runs sent to uniscribe unless white-space:pre.
- void AdjustSpaceAdvances();
-
- // Returns the total width of a single item.
- int AdvanceForItem(int item_index) const;
-
- // Shapes a run (pointed to by |input|) using |hfont| first.
- // Tries a series of fonts specified retrieved with NextWinFontData
- // and finally a font covering characters in |*input|. A string pointed
- // by |input| comes from ScriptItemize and is supposed to contain
- // characters belonging to a single script aside from characters
- // common to all scripts (e.g. space).
- bool Shape(const wchar_t* input,
- int item_length,
- int num_glyphs,
- SCRIPT_ITEM& run,
- Shaping& shaping);
-
- // Gets Windows font data for the next best font to try in the list
- // of fonts. When there's no more font available, returns false
- // without touching any of out params. Need to call ResetFontIndex
- // to start scanning of the font list from the beginning.
- virtual bool NextWinFontData(HFONT* hfont,
- SCRIPT_CACHE** script_cache,
- SCRIPT_FONTPROPERTIES** font_properties,
- int* ascent) {
- return false;
- }
-
- // Resets the font index to the first in the list of fonts
- // to try after the primaryFont turns out not to work. With font_index
- // reset, NextWinFontData scans fallback fonts from the beginning.
- virtual void ResetFontIndex() {}
-
- // The input data for this run of Uniscribe. See the constructor.
- const wchar_t* input_;
- const int input_length_;
- const bool is_rtl_;
-
- // Windows font data for the primary font :
- // In a sense, logfont_ and style_ are redundant because
- // hfont_ contains all the information. However, invoking GetObject,
- // everytime we need the height and the style, is rather expensive so
- // that we cache them. Would it be better to add getter and (virtual)
- // setter for the height and the style of the primary font, instead of
- // logfont_? Then, a derived class ctor can set ascent_, height_ and style_
- // if they're known. Getters for them would have to 'infer' their values from
- // hfont_ ONLY when they're not set.
- HFONT hfont_;
- SCRIPT_CACHE* script_cache_;
- SCRIPT_FONTPROPERTIES* font_properties_;
- int ascent_;
- LOGFONT logfont_;
- int style_;
-
- // Options, see the getters/setters above.
- bool directional_override_;
- bool inhibit_ligate_;
- int letter_spacing_;
- int space_width_;
- int word_spacing_;
- int justification_width_;
-
- // Uniscribe breaks the text into Runs. These are one length of text that is
- // in one script and one direction. This array is in reading order.
- StackVector<SCRIPT_ITEM, UNISCRIBE_STATE_STACK_RUNS> runs_;
-
- StackVector<Shaping, UNISCRIBE_STATE_STACK_RUNS> shapes_;
-
- // This is a mapping between reading order and screen order for the items.
- // Uniscribe's items array are in reading order. For right-to-left text,
- // or mixed (although WebKit's |TextRun| should really be only one
- // direction), this makes it very difficult to compute character offsets
- // and positions. This list is in screen order from left to right, and
- // gives the index into the |runs_| and |shapes_| arrays of each
- // subsequent item.
- StackVector<int, UNISCRIBE_STATE_STACK_RUNS> screen_order_;
-
- DISALLOW_EVIL_CONSTRUCTORS(UniscribeState);
-};
-
-} // namespace gfx
-
-#endif // BASE_GFX_UNISCRIBE_H__
-
diff --git a/base/gfx/uniscribe_unittest.cc b/base/gfx/uniscribe_unittest.cc
deleted file mode 100644
index bbad411..0000000
--- a/base/gfx/uniscribe_unittest.cc
+++ /dev/null
@@ -1,140 +0,0 @@
-// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "base/gfx/uniscribe.h"
-#include "base/win_util.h"
-#include "testing/gtest/include/gtest/gtest.h"
-
-// This must be in the gfx namespace for the friend statements in uniscribe.h
-// to work.
-namespace gfx {
-
-namespace {
-
-class UniscribeTest : public testing::Test {
- public:
- UniscribeTest() {
- }
-
- // Returns an HFONT with the given name. The caller does not have to free
- // this, it will be automatically freed at the end of the test. Returns NULL
- // on failure. On success, the
- HFONT MakeFont(const wchar_t* font_name, SCRIPT_CACHE** cache) {
- LOGFONT lf;
- memset(&lf, 0, sizeof(LOGFONT));
- lf.lfHeight = 20;
- wcscpy_s(lf.lfFaceName, font_name);
-
- HFONT hfont = CreateFontIndirect(&lf);
- if (!hfont)
- return NULL;
-
- *cache = new SCRIPT_CACHE;
- **cache = NULL;
- created_fonts_.push_back(std::make_pair(hfont, *cache));
- return hfont;
- }
-
- protected:
- // Default font properties structure for tests to use.
- SCRIPT_FONTPROPERTIES properties_;
-
- private:
- virtual void SetUp() {
- memset(&properties_, 0, sizeof(SCRIPT_FONTPROPERTIES));
- properties_.cBytes = sizeof(SCRIPT_FONTPROPERTIES);
- properties_.wgBlank = ' ';
- properties_.wgDefault = '?'; // Used when the character is not in the font.
- properties_.wgInvalid = '#'; // Used for invalid characters.
- }
-
- virtual void TearDown() {
- // Free any allocated fonts.
- for (size_t i = 0; i < created_fonts_.size(); i++) {
- DeleteObject(created_fonts_[i].first);
- ScriptFreeCache(created_fonts_[i].second);
- delete created_fonts_[i].second;
- }
- created_fonts_.clear();
- }
-
- // Tracks allocated fonts so we can delete them at the end of the test.
- // The script cache pointer is heap allocated and must be freed.
- std::vector< std::pair<HFONT, SCRIPT_CACHE*> > created_fonts_;
-
- DISALLOW_EVIL_CONSTRUCTORS(UniscribeTest);
-};
-
-} // namespace
-
-// This test tests giving Uniscribe a very large buffer, which will cause a
-// failure.
-TEST_F(UniscribeTest, TooBig) {
- // This test will only run on Windows XP. It seems Uniscribe does not have the
- // internal limit on Windows 2000 that we rely on to cause this failure.
- if (win_util::GetWinVersion() <= win_util::WINVERSION_2000)
- return;
-
- // Make a large string with an e with a zillion combining accents.
- std::wstring input(L"e");
- for (int i = 0; i < 100000; i++)
- input.push_back(0x301); // Combining acute accent.
-
- SCRIPT_CACHE* script_cache;
- HFONT hfont = MakeFont(L"Times New Roman", &script_cache);
- ASSERT_TRUE(hfont);
-
- // Test a long string without the normal length protection we have. This will
- // cause shaping to fail.
- {
- gfx::UniscribeState uniscribe(input.data(), static_cast<int>(input.size()),
- false, hfont, script_cache, &properties_);
- uniscribe.InitWithOptionalLengthProtection(false);
-
- // There should be one shaping entry, with nothing in it.
- ASSERT_EQ(1, uniscribe.shapes_->size());
- EXPECT_EQ(0, uniscribe.shapes_[0].glyphs->size());
- EXPECT_EQ(0, uniscribe.shapes_[0].logs->size());
- EXPECT_EQ(0, uniscribe.shapes_[0].visattr->size());
- EXPECT_EQ(0, uniscribe.shapes_[0].advance->size());
- EXPECT_EQ(0, uniscribe.shapes_[0].offsets->size());
- EXPECT_EQ(0, uniscribe.shapes_[0].justify->size());
- EXPECT_EQ(0, uniscribe.shapes_[0].abc.abcA);
- EXPECT_EQ(0, uniscribe.shapes_[0].abc.abcB);
- EXPECT_EQ(0, uniscribe.shapes_[0].abc.abcC);
-
- // The sizes of the other stuff should match the shaping entry.
- EXPECT_EQ(1, uniscribe.runs_->size());
- EXPECT_EQ(1, uniscribe.screen_order_->size());
-
- // Check that the various querying functions handle the empty case properly.
- EXPECT_EQ(0, uniscribe.Width());
- EXPECT_EQ(0, uniscribe.FirstGlyphForCharacter(0));
- EXPECT_EQ(0, uniscribe.FirstGlyphForCharacter(1000));
- EXPECT_EQ(0, uniscribe.XToCharacter(0));
- EXPECT_EQ(0, uniscribe.XToCharacter(1000));
- }
-
- // Now test the very large string and make sure it is handled properly by the
- // length protection.
- {
- gfx::UniscribeState uniscribe(input.data(), static_cast<int>(input.size()),
- false, hfont, script_cache, &properties_);
- uniscribe.InitWithOptionalLengthProtection(true);
-
- // There should be 0 runs and shapes.
- EXPECT_EQ(0, uniscribe.runs_->size());
- EXPECT_EQ(0, uniscribe.shapes_->size());
- EXPECT_EQ(0, uniscribe.screen_order_->size());
-
- EXPECT_EQ(0, uniscribe.Width());
- EXPECT_EQ(0, uniscribe.FirstGlyphForCharacter(0));
- EXPECT_EQ(0, uniscribe.FirstGlyphForCharacter(1000));
- EXPECT_EQ(0, uniscribe.XToCharacter(0));
- EXPECT_EQ(0, uniscribe.XToCharacter(1000));
- }
-}
-
-} // namespace gfx
-