// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. // Use of this source code is governed by a BSD-style license that can be // found in the LICENSE file. // // A wrapper around Uniscribe that provides a reasonable API. #ifndef BASE_GFX_UNISCRIBE_H__ #define BASE_GFX_UNISCRIBE_H__ #include #include #include #include #include #include "base/stack_container.h" #include "testing/gtest/include/gtest/gtest_prod.h" namespace gfx { #define UNISCRIBE_STATE_STACK_RUNS 8 #define UNISCRIBE_STATE_STACK_CHARS 32 // This object should be safe to create & destroy frequently, as long as the // caller preserves the script_cache when possible (this data may be slow to // compute). // // This object is "kind of large" (~1K) because it reserves a lot of space for // working with to avoid expensive heap operations. Therefore, not only should // you not worry about creating and destroying it, you should try to not keep // them around. class UniscribeState { public: // Initializes this Uniscribe run with the text pointed to by |run| with // |length|. The input is NOT null terminated. // // The is_rtl flag should be set if the input script is RTL. It is assumed // that the caller has already divided up the input text (using ICU, for // example) into runs of the same direction of script. This avoids // disagreements between the caller and Uniscribe later (see FillItems). // // A script cache should be provided by the caller that is initialized to // NULL. When the caller is done with the cache (it may be stored between // runs as long as it is used consistently with the same HFONT), it should // call ScriptFreeCache(). UniscribeState(const wchar_t* input, int input_length, bool is_rtl, HFONT hfont, SCRIPT_CACHE* script_cache, SCRIPT_FONTPROPERTIES* font_properties); virtual ~UniscribeState(); // Sets Uniscribe's directional override flag. False by default. bool directional_override() const { return directional_override_; } void set_directional_override(bool override) { directional_override_ = override; } // Set's Uniscribe's no-ligate override flag. False by default. bool inhibit_ligate() const { return inhibit_ligate_; } void set_inhibit_ligate(bool inhibit) { inhibit_ligate_ = inhibit; } // Set letter spacing. We will try to insert this much space between // graphemes (one or more glyphs perceived as a single unit by ordinary users // of a script). Positive values increase letter spacing, negative values // decrease it. 0 by default. int letter_spacing() const { return letter_spacing_; } void set_letter_spacing(int letter_spacing) { letter_spacing_ = letter_spacing; } // Set the width of a standard space character. We use this to normalize // space widths. Windows will make spaces after Hindi characters larger than // other spaces. A space_width of 0 means to use the default space width. // // Must be set before Init() is called. int space_width() const { return space_width_; } void set_space_width(int space_width) { space_width_ = space_width; } // Set word spacing. We will try to insert this much extra space between // each word in the input (beyond whatever whitespace character separates // words). Positive values lead to increased letter spacing, negative values // decrease it. 0 by default. // // Must be set before Init() is called. int word_spacing() const { return word_spacing_; } void set_word_spacing(int word_spacing) { word_spacing_ = word_spacing; } void set_ascent(int ascent) { ascent_ = ascent; } // You must call this after setting any options but before doing any // other calls like asking for widths or drawing. void Init() { InitWithOptionalLengthProtection(true); } // Returns the total width in pixels of the text run. int Width() const; // Call to justify the text, with the amount of space that should be ADDED to // get the desired width that the column should be justified to. Normally, // spaces are inserted, but for Arabic there will be kashidas (extra strokes) // inserted instead. // // This function MUST be called AFTER Init(). void Justify(int additional_space); // Computes the given character offset into a pixel offset of the beginning // of that character. int CharacterToX(int offset) const; // Converts the given pixel X position into a logical character offset into // the run. For positions appearing before the first character, this will // return -1. int XToCharacter(int x) const; // Draws the given characters to (x, y) in the given DC. The font will be // handled by this function, but the font color and other attributes should // be pre-set. // // The y position is the upper left corner, NOT the baseline. void Draw(HDC dc, int x, int y, int from, int to); // Returns the first glyph assigned to the character at the given offset. // This function is used to retrieve glyph information when Uniscribe is // being used to generate glyphs for non-complex, non-BMP (above U+FFFF) // characters. These characters are not otherwise special and have no // complex shaping rules, so we don't otherwise need Uniscribe, except // Uniscribe is the only way to get glyphs for non-BMP characters. // // Returns 0 if there is no glyph for the given character. WORD FirstGlyphForCharacter(int char_offset) const; protected: // Backend for init. The flag allows the unit test to specify whether we // should fail early for very long strings like normal, or try to pass the // long string to Uniscribe. The latter provides a way to force failure of // shaping. void InitWithOptionalLengthProtection(bool length_protection); // Tries to preload the font when the it is not accessible. // This is the default implementation and it does not do anything. virtual void TryToPreloadFont(HFONT font) {} private: FRIEND_TEST(UniscribeTest, TooBig); // An array corresponding to each item in runs_ containing information // on each of the glyphs that were generated. Like runs_, this is in // reading order. However, for rtl text, the characters within each // item will be reversed. struct Shaping { Shaping() : pre_padding(0), hfont_(NULL), script_cache_(NULL), ascent_offset_(0) { abc.abcA = 0; abc.abcB = 0; abc.abcC = 0; } // Returns the number of glyphs (which will be drawn to the screen) // in this run. int glyph_length() const { return static_cast(glyphs->size()); } // Returns the number of characters (that we started with) in this run. int char_length() const { return static_cast(logs->size()); } // Returns the advance array that should be used when measuring glyphs. // The returned pointer will indicate an array with glyph_length() elements // and the advance that should be used for each one. This is either the // real advance, or the justified advances if there is one, and is the // array we want to use for measurement. const int* effective_advances() const { if (advance->empty()) return 0; if (justify->empty()) return &advance[0]; return &justify[0]; } // This is the advance amount of space that we have added to the beginning // of the run. It is like the ABC's |A| advance but one that we create and // must handle internally whenever computing with pixel offsets. int pre_padding; // Glyph indices in the font used to display this item. These indices // are in screen order. StackVector glyphs; // For each input character, this tells us the first glyph index it // generated. This is the only array with size of the input chars. // // All offsets are from the beginning of this run. Multiple characters can // generate one glyph, in which case there will be adjacent duplicates in // this list. One character can also generate multiple glyphs, in which // case there will be skipped indices in this list. StackVector logs; // Flags and such for each glyph. StackVector visattr; // Horizontal advances for each glyph listed above, this is basically // how wide each glyph is. StackVector advance; // This contains glyph offsets, from the nominal position of a glyph. It // is used to adjust the positions of multiple combining characters // around/above/below base characters in a context-sensitive manner so // that they don't bump against each other and the base character. StackVector offsets; // Filled by a call to Justify, this is empty for nonjustified text. // If nonempty, this contains the array of justify characters for each // character as returned by ScriptJustify. // // This is the same as the advance array, but with extra space added for // some characters. The difference between a glyph's |justify| width and // it's |advance| width is the extra space added. StackVector justify; // Sizing information for this run. This treats the entire run as a // character with a preceeding advance, width, and ending advance. // The B width is the sum of the |advance| array, and the A and C widths // are any extra spacing applied to each end. // // It is unclear from the documentation what this actually means. From // experimentation, it seems that the sum of the character advances is // always the sum of the ABC values, and I'm not sure what you're supposed // to do with the ABC values. ABC abc; // Pointers to windows font data used to render this run. HFONT hfont_; SCRIPT_CACHE* script_cache_; // Ascent offset between the ascent of the primary font // and that of the fallback font. The offset needs to be applied, // when drawing a string, to align multiple runs rendered with // different fonts. int ascent_offset_; }; // Computes the runs_ array from the text run. void FillRuns(); // Computes the shapes_ array given an runs_ array already filled in. void FillShapes(); // Fills in the screen_order_ array (see below). void FillScreenOrder(); // Called to update the glyph positions based on the current spacing options // that are set. void ApplySpacing(); // Normalizes all advances for spaces to the same width. This keeps windows // from making spaces after Hindi characters larger, which is then // inconsistent with our meaure of the width since WebKit doesn't include // spaces in text-runs sent to uniscribe unless white-space:pre. void AdjustSpaceAdvances(); // Returns the total width of a single item. int AdvanceForItem(int item_index) const; // Shapes a run (pointed to by |input|) using |hfont| first. // Tries a series of fonts specified retrieved with NextWinFontData // and finally a font covering characters in |*input|. A string pointed // by |input| comes from ScriptItemize and is supposed to contain // characters belonging to a single script aside from characters // common to all scripts (e.g. space). bool Shape(const wchar_t* input, int item_length, int num_glyphs, SCRIPT_ITEM& run, Shaping& shaping); // Gets Windows font data for the next best font to try in the list // of fonts. When there's no more font available, returns false // without touching any of out params. Need to call ResetFontIndex // to start scanning of the font list from the beginning. virtual bool NextWinFontData(HFONT* hfont, SCRIPT_CACHE** script_cache, SCRIPT_FONTPROPERTIES** font_properties, int* ascent) { return false; } // Resets the font index to the first in the list of fonts // to try after the primaryFont turns out not to work. With font_index // reset, NextWinFontData scans fallback fonts from the beginning. virtual void ResetFontIndex() {} // The input data for this run of Uniscribe. See the constructor. const wchar_t* input_; const int input_length_; const bool is_rtl_; // Windows font data for the primary font : // In a sense, logfont_ and style_ are redundant because // hfont_ contains all the information. However, invoking GetObject, // everytime we need the height and the style, is rather expensive so // that we cache them. Would it be better to add getter and (virtual) // setter for the height and the style of the primary font, instead of // logfont_? Then, a derived class ctor can set ascent_, height_ and style_ // if they're known. Getters for them would have to 'infer' their values from // hfont_ ONLY when they're not set. HFONT hfont_; SCRIPT_CACHE* script_cache_; SCRIPT_FONTPROPERTIES* font_properties_; int ascent_; LOGFONT logfont_; int style_; // Options, see the getters/setters above. bool directional_override_; bool inhibit_ligate_; int letter_spacing_; int space_width_; int word_spacing_; int justification_width_; // Uniscribe breaks the text into Runs. These are one length of text that is // in one script and one direction. This array is in reading order. StackVector runs_; StackVector shapes_; // This is a mapping between reading order and screen order for the items. // Uniscribe's items array are in reading order. For right-to-left text, // or mixed (although WebKit's |TextRun| should really be only one // direction), this makes it very difficult to compute character offsets // and positions. This list is in screen order from left to right, and // gives the index into the |runs_| and |shapes_| arrays of each // subsequent item. StackVector screen_order_; DISALLOW_EVIL_CONSTRUCTORS(UniscribeState); }; } // namespace gfx #endif // BASE_GFX_UNISCRIBE_H__