1 files changed, 380 insertions, 0 deletions
diff --git a/webkit/port/platform/graphics/UniscribeHelper.h b/webkit/port/platform/graphics/UniscribeHelper.h
new file mode 100644
index 0000000..27d7961
--- /dev/null
+++ b/webkit/port/platform/graphics/UniscribeHelper.h
@@ -0,0 +1,380 @@
+// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+//
+// A wrapper around Uniscribe that provides a reasonable API.
+
+#ifndef UniscribeHelper_h
+#define UniscribeHelper_h
+
+#include <windows.h>
+#include <usp10.h>
+#include <map>
+
+#include "wtf/Vector.h"
+
+#include "testing/gtest/include/gtest/gtest_prod.h"
+#include "unicode/uchar.h"
+
+namespace WebCore {
+
+#define UNISCRIBE_HELPER_STACK_RUNS 8
+#define UNISCRIBE_HELPER_STACK_CHARS 32
+
+// This object should be safe to create & destroy frequently, as long as the
+// caller preserves the script_cache when possible (this data may be slow to
+// compute).
+//
+// This object is "kind of large" (~1K) because it reserves a lot of space for
+// working with to avoid expensive heap operations. Therefore, not only should
+// you not worry about creating and destroying it, you should try to not keep
+// them around.
+class UniscribeHelper {
+public:
+    // Initializes this Uniscribe run with the text pointed to by |run| with
+    // |length|. The input is NOT null terminated.
+    //
+    // The is_rtl flag should be set if the input script is RTL. It is assumed
+    // that the caller has already divided up the input text (using ICU, for
+    // example) into runs of the same direction of script. This avoids
+    // disagreements between the caller and Uniscribe later (see FillItems).
+    //
+    // A script cache should be provided by the caller that is initialized to
+    // NULL. When the caller is done with the cache (it may be stored between
+    // runs as long as it is used consistently with the same HFONT), it should
+    // call ScriptFreeCache().
+    UniscribeHelper(const UChar* input,
+                    int inputLength,
+                    bool isRtl,
+                    HFONT hfont,
+                    SCRIPT_CACHE* scriptCache,
+                    SCRIPT_FONTPROPERTIES* fontProperties);
+
+    virtual ~UniscribeHelper();
+
+    // Sets Uniscribe's directional override flag. False by default.
+    bool directionalOverride() const
+    {
+        return m_directionalOverride;
+    }
+    void setDirectionalOverride(bool override)
+    {
+        m_directionalOverride = override;
+    }
+
+    // Set's Uniscribe's no-ligate override flag. False by default.
+    bool inhibitLigate() const
+    {
+        return m_inhibitLigate;
+    }
+    void setInhibitLigate(bool inhibit)
+    {
+        m_inhibitLigate = inhibit;
+    }
+
+    // Set letter spacing. We will try to insert this much space between
+    // graphemes (one or more glyphs perceived as a single unit by ordinary
+    // users of a script). Positive values increase letter spacing, negative
+    // values decrease it. 0 by default.
+    int letterSpacing() const
+    {
+        return m_letterSpacing;
+    }
+    void setLetterSpacing(int letterSpacing)
+    {
+        m_letterSpacing = letterSpacing;
+    }
+
+    // Set the width of a standard space character. We use this to normalize
+    // space widths. Windows will make spaces after Hindi characters larger than
+    // other spaces. A space_width of 0 means to use the default space width.
+    //
+    // Must be set before Init() is called.
+    int spaceWidth() const
+    {
+        return m_spaceWidth;
+    }
+    void setSpaceWidth(int spaceWidth)
+    {
+        m_spaceWidth = spaceWidth;
+    }
+
+    // Set word spacing. We will try to insert this much extra space between
+    // each word in the input (beyond whatever whitespace character separates
+    // words). Positive values lead to increased letter spacing, negative values
+    // decrease it. 0 by default.
+    //
+    // Must be set before Init() is called.
+    int wordSpacing() const
+    {
+        return m_wordSpacing;
+    }
+    void setWordSpacing(int wordSpacing)
+    {
+        m_wordSpacing = wordSpacing;
+    }
+
+    void setAscent(int ascent)
+    {
+        m_ascent = ascent;
+    }
+
+    // You must call this after setting any options but before doing any
+    // other calls like asking for widths or drawing.
+    void Init()
+    {
+        InitWithOptionalLengthProtection(true);
+    }
+
+    // Returns the total width in pixels of the text run.
+    int Width() const;
+
+    // Call to justify the text, with the amount of space that should be ADDED
+    // to get the desired width that the column should be justified to.
+    // Normally, spaces are inserted, but for Arabic there will be kashidas
+    // (extra strokes) inserted instead.
+    //
+    // This function MUST be called AFTER Init().
+    void Justify(int additionalSpace);
+
+    // Computes the given character offset into a pixel offset of the beginning
+    // of that character.
+    int CharacterToX(int offset) const;
+
+    // Converts the given pixel X position into a logical character offset into
+    // the run. For positions appearing before the first character, this will
+    // return -1.
+    int XToCharacter(int x) const;
+
+    // Draws the given characters to (x, y) in the given DC. The font will be
+    // handled by this function, but the font color and other attributes should
+    // be pre-set.
+    //
+    // The y position is the upper left corner, NOT the baseline.
+    void Draw(HDC dc, int x, int y, int from, int to);
+
+    // Returns the first glyph assigned to the character at the given offset.
+    // This function is used to retrieve glyph information when Uniscribe is
+    // being used to generate glyphs for non-complex, non-BMP (above U+FFFF)
+    // characters. These characters are not otherwise special and have no
+    // complex shaping rules, so we don't otherwise need Uniscribe, except
+    // Uniscribe is the only way to get glyphs for non-BMP characters.
+    //
+    // Returns 0 if there is no glyph for the given character.
+    WORD FirstGlyphForCharacter(int charOffset) const;
+
+protected:
+    // Backend for init. The flag allows the unit test to specify whether we
+    // should fail early for very long strings like normal, or try to pass the
+    // long string to Uniscribe. The latter provides a way to force failure of
+    // shaping.
+    void InitWithOptionalLengthProtection(bool lengthProtection);
+
+    // Tries to preload the font when the it is not accessible.
+    // This is the default implementation and it does not do anything.
+    virtual void TryToPreloadFont(HFONT font) {}
+
+private:
+    FRIEND_TEST(UniscribeTest, TooBig);
+
+    // An array corresponding to each item in runs_ containing information
+    // on each of the glyphs that were generated. Like runs_, this is in
+    // reading order. However, for rtl text, the characters within each
+    // item will be reversed.
+    struct Shaping {
+        Shaping()
+            : m_prePadding(0)
+            , m_hfont(NULL)
+            , m_scriptCache(NULL)
+            , m_ascentOffset(0) {
+            m_abc.abcA = 0;
+            m_abc.abcB = 0;
+            m_abc.abcC = 0;
+        }
+
+        // Returns the number of glyphs (which will be drawn to the screen)
+        // in this run.
+        int glyphLength() const
+        {
+            return static_cast<int>(m_glyphs.size());
+        }
+
+        // Returns the number of characters (that we started with) in this run.
+        int charLength() const
+        {
+            return static_cast<int>(m_logs.size());
+        }
+
+        // Returns the advance array that should be used when measuring glyphs.
+        // The returned pointer will indicate an array with glyph_length()
+        // elements and the advance that should be used for each one. This is
+        // either the real advance, or the justified advances if there is one,
+        // and is the array we want to use for measurement.
+        const int* effectiveAdvances() const
+        {
+            if (m_advance.size() == 0)
+                return 0;
+            if (m_justify.size() == 0)
+                return &m_advance[0];
+            return &m_justify[0];
+        }
+
+        // This is the advance amount of space that we have added to the
+        // beginning of the run. It is like the ABC's |A| advance but one that
+        // we create and must handle internally whenever computing with pixel
+        // offsets.
+        int m_prePadding;
+
+        // Glyph indices in the font used to display this item. These indices
+        // are in screen order.
+        Vector<WORD, UNISCRIBE_HELPER_STACK_CHARS> m_glyphs;
+
+        // For each input character, this tells us the first glyph index it
+        // generated. This is the only array with size of the input chars.
+        //
+        // All offsets are from the beginning of this run. Multiple characters
+        // can generate one glyph, in which case there will be adjacent
+        // duplicates in this list. One character can also generate multiple
+        // glyphs, in which case there will be skipped indices in this list.
+        Vector<WORD, UNISCRIBE_HELPER_STACK_CHARS> m_logs;
+
+        // Flags and such for each glyph.
+        Vector<SCRIPT_VISATTR, UNISCRIBE_HELPER_STACK_CHARS> m_visattr;
+
+        // Horizontal advances for each glyph listed above, this is basically
+        // how wide each glyph is.
+        Vector<int, UNISCRIBE_HELPER_STACK_CHARS> m_advance;
+
+        // This contains glyph offsets, from the nominal position of a glyph.
+        // It is used to adjust the positions of multiple combining characters
+        // around/above/below base characters in a context-sensitive manner so
+        // that they don't bump against each other and the base character.
+        Vector<GOFFSET, UNISCRIBE_HELPER_STACK_CHARS> m_offsets;
+
+        // Filled by a call to Justify, this is empty for nonjustified text.
+        // If nonempty, this contains the array of justify characters for each
+        // character as returned by ScriptJustify.
+        //
+        // This is the same as the advance array, but with extra space added
+        // for some characters. The difference between a glyph's |justify|
+        // width and it's |advance| width is the extra space added.
+        Vector<int, UNISCRIBE_HELPER_STACK_CHARS> m_justify;
+
+        // Sizing information for this run. This treats the entire run as a
+        // character with a preceeding advance, width, and ending advance.  The
+        // B width is the sum of the |advance| array, and the A and C widths
+        // are any extra spacing applied to each end.
+        //
+        // It is unclear from the documentation what this actually means. From
+        // experimentation, it seems that the sum of the character advances is
+        // always the sum of the ABC values, and I'm not sure what you're
+        // supposed to do with the ABC values.
+        ABC m_abc;
+
+        // Pointers to windows font data used to render this run.
+        HFONT m_hfont;
+        SCRIPT_CACHE* m_scriptCache;
+
+        // Ascent offset between the ascent of the primary font
+        // and that of the fallback font. The offset needs to be applied,
+        // when drawing a string, to align multiple runs rendered with
+        // different fonts.
+        int m_ascentOffset;
+    };
+
+    // Computes the runs_ array from the text run.
+    void FillRuns();
+
+    // Computes the shapes_ array given an runs_ array already filled in.
+    void FillShapes();
+
+    // Fills in the screen_order_ array (see below).
+    void FillScreenOrder();
+
+    // Called to update the glyph positions based on the current spacing
+    // options that are set.
+    void ApplySpacing();
+
+    // Normalizes all advances for spaces to the same width. This keeps windows
+    // from making spaces after Hindi characters larger, which is then
+    // inconsistent with our meaure of the width since WebKit doesn't include
+    // spaces in text-runs sent to uniscribe unless white-space:pre.
+    void AdjustSpaceAdvances();
+
+    // Returns the total width of a single item.
+    int AdvanceForItem(int item_index) const;
+
+    // Shapes a run (pointed to by |input|) using |hfont| first.
+    // Tries a series of fonts specified retrieved with NextWinFontData
+    // and finally a font covering characters in |*input|. A string pointed
+    // by |input| comes from ScriptItemize and is supposed to contain
+    // characters belonging to a single script aside from characters common to
+    // all scripts (e.g. space).
+    bool Shape(const UChar* input,
+               int item_length,
+               int num_glyphs,
+               SCRIPT_ITEM& run,
+               Shaping& shaping);
+
+    // Gets Windows font data for the next best font to try in the list
+    // of fonts. When there's no more font available, returns false
+    // without touching any of out params. Need to call ResetFontIndex
+    // to start scanning of the font list from the beginning.
+    virtual bool NextWinFontData(HFONT* hfont,
+                                 SCRIPT_CACHE** script_cache,
+                                 SCRIPT_FONTPROPERTIES** font_properties,
+                                 int* ascent) {
+        return false;
+    }
+
+    // Resets the font index to the first in the list of fonts to try after the
+    // primaryFont turns out not to work. With fontIndex reset,
+    // NextWinFontData scans fallback fonts from the beginning.
+    virtual void ResetFontIndex() {}
+
+    // The input data for this run of Uniscribe. See the constructor.
+    const UChar* m_input;
+    const int m_inputLength;
+    const bool m_isRtl;
+
+    // Windows font data for the primary font. In a sense, m_logfont and m_style
+    // are redundant because m_hfont contains all the information. However,
+    // invoking GetObject, everytime we need the height and the style, is rather
+    // expensive so that we cache them. Would it be better to add getter and
+    // (virtual) setter for the height and the style of the primary font,
+    // instead of m_logfont? Then, a derived class ctor can set m_ascent,
+    // m_height and m_style if they're known. Getters for them would have to
+    // 'infer' their values from m_hfont ONLY when they're not set.
+    HFONT m_hfont;
+    SCRIPT_CACHE* m_scriptCache;
+    SCRIPT_FONTPROPERTIES* m_fontProperties;
+    int m_ascent;
+    LOGFONT m_logfont;
+    int m_style;
+
+    // Options, see the getters/setters above.
+    bool m_directionalOverride;
+    bool m_inhibitLigate;
+    int m_letterSpacing;
+    int m_spaceWidth;
+    int m_wordSpacing;
+
+    // Uniscribe breaks the text into Runs. These are one length of text that is
+    // in one script and one direction. This array is in reading order.
+    Vector<SCRIPT_ITEM, UNISCRIBE_HELPER_STACK_RUNS> m_runs;
+
+    Vector<Shaping, UNISCRIBE_HELPER_STACK_RUNS> m_shapes;
+
+    // This is a mapping between reading order and screen order for the items.
+    // Uniscribe's items array are in reading order. For right-to-left text,
+    // or mixed (although WebKit's |TextRun| should really be only one
+    // direction), this makes it very difficult to compute character offsets
+    // and positions. This list is in screen order from left to right, and
+    // gives the index into the |m_runs| and |m_shapes| arrays of each
+    // subsequent item.
+    Vector<int, UNISCRIBE_HELPER_STACK_RUNS> m_screenOrder;
+};
+
+}  // namespace WebCore
+
+#endif  // UniscribeHelper_h