// Copyright 2008, Google Inc.
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//    * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//    * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//    * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
//
// A wrapper around Uniscribe that provides a reasonable API.

#ifndef BASE_GFX_UNISCRIBE_H__
#define BASE_GFX_UNISCRIBE_H__

#include <windows.h>
#include <usp10.h>
#include <wchar.h>
#include <map>
#include <vector>

#include "base/stack_container.h"
#include "testing/gtest/include/gtest/gtest_prod.h"

namespace gfx {

#define UNISCRIBE_STATE_STACK_RUNS 8
#define UNISCRIBE_STATE_STACK_CHARS 32

// This object should be safe to create & destroy frequently, as long as the
// caller preserves the script_cache when possible (this data may be slow to
// compute).
//
// This object is "kind of large" (~1K) because it reserves a lot of space for
// working with to avoid expensive heap operations. Therefore, not only should
// you not worry about creating and destroying it, you should try to not keep
// them around.
class UniscribeState {
 public:
  // Initializes this Uniscribe run with the text pointed to by |run| with
  // |length|. The input is NOT null terminated.
  //
  // The is_rtl flag should be set if the input script is RTL. It is assumed
  // that the caller has already divided up the input text (using ICU, for
  // example) into runs of the same direction of script. This avoids
  // disagreements between the caller and Uniscribe later (see FillItems).
  //
  // A script cache should be provided by the caller that is initialized to
  // NULL. When the caller is done with the cache (it may be stored between
  // runs as long as it is used consistently with the same HFONT), it should
  // call ScriptFreeCache().
  UniscribeState(const wchar_t* input,
                 int input_length,
                 bool is_rtl,
                 HFONT hfont,
                 SCRIPT_CACHE* script_cache,
                 SCRIPT_FONTPROPERTIES* font_properties);

  virtual ~UniscribeState();

  // Sets Uniscribe's directional override flag. False by default.
  bool directional_override() const {
    return directional_override_;
  }
  void set_directional_override(bool override) {
    directional_override_ = override;
  }

  // Set's Uniscribe's no-ligate override flag. False by default.
  bool inhibit_ligate() const {
    return inhibit_ligate_;
  }
  void set_inhibit_ligate(bool inhibit) {
    inhibit_ligate_ = inhibit;
  }

  // Set letter spacing. We will try to insert this much space between
  // graphemes (one or more glyphs perceived as a single unit by ordinary users
  // of a script). Positive values increase letter spacing, negative values
  // decrease it. 0 by default.
  int letter_spacing() const {
    return letter_spacing_;
  }
  void set_letter_spacing(int letter_spacing) {
    letter_spacing_ = letter_spacing;
  }

  // Set the width of a standard space character. We use this to normalize
  // space widths. Windows will make spaces after Hindi characters larger than
  // other spaces. A space_width of 0 means to use the default space width.
  //
  // Must be set before Init() is called.
  int space_width() const {
    return space_width_;
  }
  void set_space_width(int space_width) {
    space_width_ = space_width;
  }

  // Set word spacing. We will try to insert this much extra space between
  // each word in the input (beyond whatever whitespace character separates
  // words). Positive values lead to increased letter spacing, negative values
  // decrease it. 0 by default.
  //
  // Must be set before Init() is called.
  int word_spacing() const {
    return word_spacing_;
  }
  void set_word_spacing(int word_spacing) {
    word_spacing_ = word_spacing;
  }
  void set_ascent(int ascent) {
    ascent_ = ascent;
  }

  // You must call this after setting any options but before doing any
  // other calls like asking for widths or drawing.
  void Init() { InitWithOptionalLengthProtection(true); }

  // Returns the total width in pixels of the text run.
  int Width() const;

  // Call to justify the text, with the amount of space that should be ADDED to
  // get the desired width that the column should be justified to. Normally,
  // spaces are inserted, but for Arabic there will be kashidas (extra strokes)
  // inserted instead.
  //
  // This function MUST be called AFTER Init().
  void Justify(int additional_space);

  // Computes the given character offset into a pixel offset of the beginning
  // of that character.
  int CharacterToX(int offset) const;

  // Converts the given pixel X position into a logical character offset into
  // the run. For positions appearing before the first character, this will
  // return -1.
  int XToCharacter(int x) const;

  // Draws the given characters to (x, y) in the given DC. The font will be
  // handled by this function, but the font color and other attributes should
  // be pre-set.
  //
  // The y position is the upper left corner, NOT the baseline.
  void Draw(HDC dc, int x, int y, int from, int to);

  // Returns the first glyph assigned to the character at the given offset.
  // This function is used to retrieve glyph information when Uniscribe is
  // being used to generate glyphs for non-complex, non-BMP (above U+FFFF)
  // characters. These characters are not otherwise special and have no
  // complex shaping rules, so we don't otherwise need Uniscribe, except
  // Uniscribe is the only way to get glyphs for non-BMP characters.
  //
  // Returns 0 if there is no glyph for the given character.
  WORD FirstGlyphForCharacter(int char_offset) const;

 protected:
  // Backend for init. The flag allows the unit test to specify whether we
  // should fail early for very long strings like normal, or try to pass the
  // long string to Uniscribe. The latter provides a way to force failure of
  // shaping.
  void InitWithOptionalLengthProtection(bool length_protection);

  // Tries to preload the font when the it is not accessible.
  // This is the default implementation and it does not do anything.
  virtual void TryToPreloadFont(HFONT font) {}

 private:
  FRIEND_TEST(UniscribeTest, TooBig);

  // An array corresponding to each item in runs_ containing information
  // on each of the glyphs that were generated. Like runs_, this is in
  // reading order. However, for rtl text, the characters within each
  // item will be reversed.
  struct Shaping {
    Shaping()
        : pre_padding(0),
          hfont_(NULL),
          script_cache_(NULL),
          ascent_offset_(0) {
      abc.abcA = 0;
      abc.abcB = 0;
      abc.abcC = 0;
    }

    // Returns the number of glyphs (which will be drawn to the screen)
    // in this run.
    int glyph_length() const {
      return static_cast<int>(glyphs->size());
    }

    // Returns the number of characters (that we started with) in this run.
    int char_length() const {
      return static_cast<int>(logs->size());
    }

    // Returns the advance array that should be used when measuring glyphs.
    // The returned pointer will indicate an array with glyph_length() elements
    // and the advance that should be used for each one. This is either the
    // real advance, or the justified advances if there is one, and is the
    // array we want to use for measurement.
    const int* effective_advances() const {
      if (advance->empty())
        return 0;
      if (justify->empty())
        return &advance[0];
      return &justify[0];
    }

    // This is the advance amount of space that we have added to the beginning
    // of the run. It is like the ABC's |A| advance but one that we create and
    // must handle internally whenever computing with pixel offsets.
    int pre_padding;

    // Glyph indices in the font used to display this item. These indices
    // are in screen order.
    StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> glyphs;

    // For each input character, this tells us the first glyph index it
    // generated. This is the only array with size of the input chars.
    //
    // All offsets are from the beginning of this run. Multiple characters can
    // generate one glyph, in which case there will be adjacent duplicates in
    // this list. One character can also generate multiple glyphs, in which
    // case there will be skipped indices in this list.
    StackVector<WORD, UNISCRIBE_STATE_STACK_CHARS> logs;

    // Flags and such for each glyph.
    StackVector<SCRIPT_VISATTR, UNISCRIBE_STATE_STACK_CHARS> visattr;

    // Horizontal advances for each glyph listed above, this is basically
    // how wide each glyph is.
    StackVector<int, UNISCRIBE_STATE_STACK_CHARS> advance;

    // This contains glyph offsets, from the nominal position of a glyph. It
    // is used to adjust the positions of multiple combining characters
    // around/above/below base characters in a context-sensitive manner so
    // that they don't bump against each other and the base character.
    StackVector<GOFFSET, UNISCRIBE_STATE_STACK_CHARS> offsets;

    // Filled by a call to Justify, this is empty for nonjustified text.
    // If nonempty, this contains the array of justify characters for each
    // character as returned by ScriptJustify.
    //
    // This is the same as the advance array, but with extra space added for
    // some characters. The difference between a glyph's |justify| width and
    // it's |advance| width is the extra space added.
    StackVector<int, UNISCRIBE_STATE_STACK_CHARS> justify;

    // Sizing information for this run. This treats the entire run as a
    // character with a preceeding advance, width, and ending advance.
    // The B width is the sum of the |advance| array, and the A and C widths
    // are any extra spacing applied to each end.
    //
    // It is unclear from the documentation what this actually means. From
    // experimentation, it seems that the sum of the character advances is
    // always the sum of the ABC values, and I'm not sure what you're supposed
    // to do with the ABC values.
    ABC abc;

    // Pointers to windows font data used to render this run.
    HFONT hfont_;
    SCRIPT_CACHE* script_cache_;

    // Ascent offset between the ascent of the primary font
    // and that of the fallback font. The offset needs to be applied,
    // when drawing a string, to align multiple runs rendered with
    // different fonts.
    int ascent_offset_;
  };

  // Computes the runs_ array from the text run.
  void FillRuns();

  // Computes the shapes_ array given an runs_ array already filled in.
  void FillShapes();

  // Fills in the screen_order_ array (see below).
  void FillScreenOrder();

  // Called to update the glyph positions based on the current spacing options
  // that are set.
  void ApplySpacing();

  // Normalizes all advances for spaces to the same width. This keeps windows
  // from making spaces after Hindi characters larger, which is then
  // inconsistent with our meaure of the width since WebKit doesn't include
  // spaces in text-runs sent to uniscribe unless white-space:pre.
  void AdjustSpaceAdvances();

  // Returns the total width of a single item.
  int AdvanceForItem(int item_index) const;

  // Shapes a run (pointed to by |input|) using |hfont| first.
  // Tries a series of fonts specified retrieved with NextWinFontData
  // and finally a font covering characters in |*input|. A string pointed
  // by |input| comes from ScriptItemize and is supposed to contain
  // characters belonging to a single script aside from characters
  // common to all scripts (e.g. space).
  bool Shape(const wchar_t* input,
             int item_length,
             int num_glyphs,
             SCRIPT_ITEM& run,
             Shaping& shaping);

  // Gets Windows font data for the next best font to try in the list
  // of fonts. When there's no more font available, returns false
  // without touching any of out params. Need to call ResetFontIndex
  // to start scanning of the font list from the beginning.
  virtual bool NextWinFontData(HFONT* hfont,
                               SCRIPT_CACHE** script_cache,
                               SCRIPT_FONTPROPERTIES** font_properties,
                               int* ascent) {
    return false;
  }

  // Resets the font index to the first in the list of fonts
  // to try after the primaryFont turns out not to work. With font_index
  // reset, NextWinFontData scans fallback fonts from the beginning.
  virtual void ResetFontIndex() {}

  // The input data for this run of Uniscribe. See the constructor.
  const wchar_t* input_;
  const int input_length_;
  const bool is_rtl_;

  // Windows font data for the primary font :
  // In a sense, logfont_ and style_ are redundant because
  // hfont_ contains all the information. However, invoking GetObject,
  // everytime we need the height and the style, is rather expensive so
  // that we cache them. Would it be better to add getter and (virtual)
  // setter for the height and the style of the primary font, instead of
  // logfont_? Then, a derived class ctor can set ascent_, height_ and style_
  // if they're known. Getters for them would have to 'infer' their values from
  // hfont_ ONLY when they're not set.
  HFONT hfont_;
  SCRIPT_CACHE* script_cache_;
  SCRIPT_FONTPROPERTIES* font_properties_;
  int ascent_;
  LOGFONT logfont_;
  int style_;

  // Options, see the getters/setters above.
  bool directional_override_;
  bool inhibit_ligate_;
  int letter_spacing_;
  int space_width_;
  int word_spacing_;
  int justification_width_;

  // Uniscribe breaks the text into Runs. These are one length of text that is
  // in one script and one direction. This array is in reading order.
  StackVector<SCRIPT_ITEM, UNISCRIBE_STATE_STACK_RUNS> runs_;

  StackVector<Shaping, UNISCRIBE_STATE_STACK_RUNS> shapes_;

  // This is a mapping between reading order and screen order for the items.
  // Uniscribe's items array are in reading order. For right-to-left text,
  // or mixed (although WebKit's |TextRun| should really be only one
  // direction), this makes it very difficult to compute character offsets
  // and positions. This list is in screen order from left to right, and
  // gives the index into the |runs_| and |shapes_| arrays of each
  // subsequent item.
  StackVector<int, UNISCRIBE_STATE_STACK_RUNS> screen_order_;

  DISALLOW_EVIL_CONSTRUCTORS(UniscribeState);
};

}  // namespace gfx

#endif  // BASE_GFX_UNISCRIBE_H__