Add base to the repository.

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@8 0039d316-1c4b-4281-b951-d872f2087c98
author: initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> 2008-07-26 21:49:38 +0000
committer: initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> 2008-07-26 21:49:38 +0000
commit: d7cae12696b96500c05dd2d430f6238922c20c96 (patch)
tree: ecff27b367735535b2a66477f8cd89d3c462a6c0 /base/gfx/uniscribe.h
parent: ee2815e28d408216cf94e874825b6bcf76c69083 (diff)
download: chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.zip
chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.tar.gz
chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.tar.bz2
1 files changed, 390 insertions, 0 deletions
diff --git a/base/gfx/uniscribe.h b/base/gfx/uniscribe.h
new file mode 100644
index 0000000..29f0d811
--- /dev/null
+++ b/base/gfx/uniscribe.h
@@ -0,0 +1,390 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// A wrapper around Uniscribe that provides a reasonable API.
+
+#ifndef BASE_GFX_UNISCRIBE_H__
+#define BASE_GFX_UNISCRIBE_H__
+
+#include <windows.h>
+#include <usp10.h>
+#include <wchar.h>
+#include <map>
+#include <vector>
+
+#include "base/stack_container.h"
+#include "testing/gtest/include/gtest/gtest_prod.h"
+
+namespace gfx {
+
+#define UNISCRIBE_STATE_STACK_RUNS 8
+#define UNISCRIBE_STATE_STACK_CHARS 32
+
+// This object should be safe to create & destroy frequently, as long as the
+// caller preserves the script_cache when possible (this data may be slow to
+// compute).
+//
+// This object is "kind of large" (~1K) because it reserves a lot of space for
+// working with to avoid expensive heap operations. Therefore, not only should
+// you not worry about creating and destroying it, you should try to not keep
+// them around.
+class UniscribeState {
+ public:
+  // Initializes this Uniscribe run with the text pointed to by |run| with
+  // |length|. The input is NOT null terminated.
+  //
+  // The is_rtl flag should be set if the input script is RTL. It is assumed
+  // that the caller has already divided up the input text (using ICU, for
+  // example) into runs of the same direction of script. This avoids
+  // disagreements between the caller and Uniscribe later (see FillItems).
+  //
+  // A script cache should be provided by the caller that is initialized to
+  // NULL. When the caller is done with the cache (it may be stored between
+  // runs as long as it is used consistently with the same HFONT), it should
+  // call ScriptFreeCache().
+  UniscribeState(const wchar_t* input,
+                 int input_length,
+                 bool is_rtl,
+                 HFONT hfont,
+                 SCRIPT_CACHE* script_cache,
+                 SCRIPT_FONTPROPERTIES* font_properties);
+
+  virtual ~UniscribeState();
+
+  // Sets Uniscribe's directional override flag. False by default.
+  bool directional_override() const {
+    return directional_override_;
+  }
+  void set_directional_override(bool override) {
+    directional_override_ = override;
+  }
+
+  // Set's Uniscribe's no-ligate override flag. False by default.
+  bool inhibit_ligate() const {
+    return inhibit_ligate_;
+  }
+  void set_inhibit_ligate(bool inhibit) {
+    inhibit_ligate_ = inhibit;
+  }
+
+  // Set letter spacing. We will try to insert this much space between
+  // graphemes (one or more glyphs perceived as a single unit by ordinary users
+  // of a script). Positive values increase letter spacing, negative values
+  // decrease it. 0 by default.
+  int letter_spacing() const {
+    return letter_spacing_;
+  }
+  void set_letter_spacing(int letter_spacing) {
+    letter_spacing_ = letter_spacing;
+  }
+
+  // Set the width of a standard space character. We use this to normalize
+  // space widths. Windows will make spaces after Hindi characters larger than
+  // other spaces. A space_width of 0 means to use the default space width.
+  //
+  // Must be set before Init() is called.
+  int space_width() const {
+    return space_width_;
+  }
+  void set_space_width(int space_width) {
+    space_width_ = space_width;
+  }
+
+  // Set word spacing. We will try to insert this much extra space between
+  // each word in the input (beyond whatever whitespace character separates
+  // words). Positive values lead to increased letter spacing, negative values
+  // decrease it. 0 by default.
+  //
+  // Must be set before Init() is called.
+  int word_spacing() const {
+    return word_spacing_;
+  }
+  void set_word_spacing(int word_spacing) {
+    word_spacing_ = word_spacing;
+  }
+  void set_ascent(int ascent) {
+    ascent_ = ascent;
+  }
+
+  // You must call this after setting any options but before doing any
+  // other calls like asking for widths or drawing.
+  void Init() { InitWithOptionalLengthProtection(true); }
+
+  // Returns the total width in pixels of the text run.
+  int Width() const;
+
+  // Call to justify the text, with the amount of space that should be ADDED to
+  // get the desired width that the column should be justified to. Normally,
+  // spaces are inserted, but for Arabic there will be kashidas (extra strokes)
+  // inserted instead.
+  //
+  // This function MUST be called AFTER Init().
+  void Justify(int additional_space);
+
+  // Computes the given character offset into a pixel offset of the beginning
+  // of that character.
+  int CharacterToX(int offset) const;
+
+  // Converts the given pixel X position into a logical character offset into
+  // the run. For positions appearing before the first character, this will
+  // return -1.
+  int XToCharacter(int x) const;
+
+  // Draws the given characters to (x, y) in the given DC. The font will be
+  // handled by this function, but the font color and other attributes should
+  // be pre-set.
+  //
+  // The y position is the upper left corner, NOT the baseline.
+  void Draw(HDC dc, int x, int y, int from, int to);
+
+  // Returns the first glyph assigned to the character at the given offset.
+  // This function is used to retrieve glyph information when Uniscribe is
+  // being used to generate glyphs for non-complex, non-BMP (above U+FFFF)
+  // characters. These characters are not otherwise special and have no
+  // complex shaping rules, so we don't otherwise need Uniscribe, except
+  // Uniscribe is the only way to get glyphs for non-BMP characters.
+  //
+  // Returns 0 if there is no glyph for the given character.
+  WORD FirstGlyphForCharacter(int char_offset) const;
+
+ protected:
+  // Backend for init. The flag allows the unit test to specify whether we
+  // should fail early for very long strings like normal, or try to pass the
+  // long string to Uniscribe. The latter provides a way to force failure of
+  // shaping.
+  void InitWithOptionalLengthProtection(bool length_protection);
+
+  // Tries to preload the font when the it is not accessible.
+  // This is the default implementation and it does not do anything.
+  virtual void TryToPreloadFont(HFONT font) {}
+
+ private:
+  FRIEND_TEST(UniscribeTest, TooBig);
+
+  // An array corresponding to each item in runs_ containing information
+  // on each of the glyphs that were generated. Like runs_, this is in
+  // reading order. However, for rtl text, the characters within each
+  // item will be reversed.
+  struct Shaping {
+    Shaping()
+        : pre_padding(0),
+          hfont_(NULL),
+          script_cache_(NULL),
+          ascent_offset_(0) {
+      abc.abcA = 0;
+      abc.abcB = 0;
+      abc.abcC = 0;
+    }
+
+    // Returns the number of glyphs (which will be drawn to the screen)
+    // in this run.
+    int glyph_length() const {
+      return static_cast<int>(glyphs->size());
+    }
+
+    // Returns the number of characters (that we started with) in this run.
+    int char_length() const {
+      return static_cast<int>(logs->size());
+    }
+
+    // Returns the advance array that should be used when measuring glyphs.
+    // The returned pointer will indicate an array with glyph_length() elements
+    // and the advance that should be used for each one. This is either the
+    // real advance, or the justified advances if there is one, and is the
+    // array we want to use for measurement.
+    const int* effective_advances() const {
+      if (advance->empty())
+        return 0;
+      if (justify->empty())
+        return &advance[0];
+      return &justify[0];
+    }
+
+    // This is the advance amount of space that we have added to the beginning
+    // of the run. It is like the ABC's |A| advance but one that we create and
+    // must handle internally whenever computing with pixel offsets.
+    int pre_padding;
+
+    // Glyph indices in the font used to display this item. These indices
+    // are in screen order.
+    StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> glyphs;
+
+    // For each input character, this tells us the first glyph index it
+    // generated. This is the only array with size of the input chars.
+    //
+    // All offsets are from the beginning of this run. Multiple characters can
+    // generate one glyph, in which case there will be adjacent duplicates in
+    // this list. One character can also generate multiple glyphs, in which
+    // case there will be skipped indices in this list.
+    StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> logs;
+
+    // Flags and such for each glyph.
+    StackVector<SCRIPT_VISATTR, UNISCRIBE_STATE_STACK_CHARS> visattr;
+
+    // Horizontal advances for each glyph listed above, this is basically
+    // how wide each glyph is.
+    StackVector<int, UNISCRIBE_STATE_STACK_CHARS> advance;
+
+    // This contains glyph offsets, from the nominal position of a glyph. It
+    // is used to adjust the positions of multiple combining characters
+    // around/above/below base characters in a context-sensitive manner so
+    // that they don't bump against each other and the base character.
+    StackVector<GOFFSET, UNISCRIBE_STATE_STACK_CHARS> offsets;
+
+    // Filled by a call to Justify, this is empty for nonjustified text.
+    // If nonempty, this contains the array of justify characters for each
+    // character as returned by ScriptJustify.
+    //
+    // This is the same as the advance array, but with extra space added for
+    // some characters. The difference between a glyph's |justify| width and
+    // it's |advance| width is the extra space added.
+    StackVector<int, UNISCRIBE_STATE_STACK_CHARS> justify;
+
+    // Sizing information for this run. This treats the entire run as a
+    // character with a preceeding advance, width, and ending advance.
+    // The B width is the sum of the |advance| array, and the A and C widths
+    // are any extra spacing applied to each end.
+    //
+    // It is unclear from the documentation what this actually means. From
+    // experimentation, it seems that the sum of the character advances is
+    // always the sum of the ABC values, and I'm not sure what you're supposed
+    // to do with the ABC values.
+    ABC abc;
+
+    // Pointers to windows font data used to render this run.
+    HFONT hfont_;
+    SCRIPT_CACHE* script_cache_;
+
+    // Ascent offset between the ascent of the primary font
+    // and that of the fallback font. The offset needs to be applied,
+    // when drawing a string, to align multiple runs rendered with
+    // different fonts.
+    int ascent_offset_;
+  };
+
+  // Computes the runs_ array from the text run.
+  void FillRuns();
+
+  // Computes the shapes_ array given an runs_ array already filled in.
+  void FillShapes();
+
+  // Fills in the screen_order_ array (see below).
+  void FillScreenOrder();
+
+  // Called to update the glyph positions based on the current spacing options
+  // that are set.
+  void ApplySpacing();
+
+  // Normalizes all advances for spaces to the same width. This keeps windows
+  // from making spaces after Hindi characters larger, which is then
+  // inconsistent with our meaure of the width since WebKit doesn't include
+  // spaces in text-runs sent to uniscribe unless white-space:pre.
+  void AdjustSpaceAdvances();
+
+  // Returns the total width of a single item.
+  int AdvanceForItem(int item_index) const;
+
+  // Shapes a run (pointed to by |input|) using |hfont| first.
+  // Tries a series of fonts specified retrieved with NextWinFontData
+  // and finally a font covering characters in |*input|. A string pointed
+  // by |input| comes from ScriptItemize and is supposed to contain
+  // characters belonging to a single script aside from characters
+  // common to all scripts (e.g. space).
+  bool Shape(const wchar_t* input,
+             int item_length,
+             int num_glyphs,
+             SCRIPT_ITEM& run,
+             Shaping& shaping);
+
+  // Gets Windows font data for the next best font to try in the list
+  // of fonts. When there's no more font available, returns false
+  // without touching any of out params. Need to call ResetFontIndex
+  // to start scanning of the font list from the beginning.
+  virtual bool NextWinFontData(HFONT* hfont,
+                               SCRIPT_CACHE** script_cache,
+                               SCRIPT_FONTPROPERTIES** font_properties,
+                               int* ascent) {
+    return false;
+  }
+
+  // Resets the font index to the first in the list of fonts
+  // to try after the primaryFont turns out not to work. With font_index
+  // reset, NextWinFontData scans fallback fonts from the beginning.
+  virtual void ResetFontIndex() {}
+
+  // The input data for this run of Uniscribe. See the constructor.
+  const wchar_t* input_;
+  const int input_length_;
+  const bool is_rtl_;
+
+  // Windows font data for the primary font :
+  // In a sense, logfont_ and style_ are redundant because
+  // hfont_ contains all the information. However, invoking GetObject,
+  // everytime we need the height and the style, is rather expensive so
+  // that we cache them. Would it be better to add getter and (virtual)
+  // setter for the height and the style of the primary font, instead of
+  // logfont_? Then, a derived class ctor can set ascent_, height_ and style_
+  // if they're known. Getters for them would have to 'infer' their values from
+  // hfont_ ONLY when they're not set.
+  HFONT hfont_;
+  SCRIPT_CACHE* script_cache_;
+  SCRIPT_FONTPROPERTIES* font_properties_;
+  int ascent_;
+  LOGFONT logfont_;
+  int style_;
+
+  // Options, see the getters/setters above.
+  bool directional_override_;
+  bool inhibit_ligate_;
+  int letter_spacing_;
+  int space_width_;
+  int word_spacing_;
+  int justification_width_;
+
+  // Uniscribe breaks the text into Runs. These are one length of text that is
+  // in one script and one direction. This array is in reading order.
+  StackVector<SCRIPT_ITEM, UNISCRIBE_STATE_STACK_RUNS> runs_;
+
+  StackVector<Shaping, UNISCRIBE_STATE_STACK_RUNS> shapes_;
+
+  // This is a mapping between reading order and screen order for the items.
+  // Uniscribe's items array are in reading order. For right-to-left text,
+  // or mixed (although WebKit's |TextRun| should really be only one
+  // direction), this makes it very difficult to compute character offsets
+  // and positions. This list is in screen order from left to right, and
+  // gives the index into the |runs_| and |shapes_| arrays of each
+  // subsequent item.
+  StackVector<int, UNISCRIBE_STATE_STACK_RUNS> screen_order_;
+
+  DISALLOW_EVIL_CONSTRUCTORS(UniscribeState);
+};
+
+}  // namespace gfx
+
+#endif  // BASE_GFX_UNISCRIBE_H__
author	initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>	2008-07-26 21:49:38 +0000
committer	initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>	2008-07-26 21:49:38 +0000
commit	d7cae12696b96500c05dd2d430f6238922c20c96 (patch)
tree	ecff27b367735535b2a66477f8cd89d3c462a6c0 /base/gfx/uniscribe.h
parent	ee2815e28d408216cf94e874825b6bcf76c69083 (diff)
download	chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.zip chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.tar.gz chromium_src-d7cae12696b96500c05dd2d430f6238922c20c96.tar.bz2