Add chrome to the repository.

git-svn-id: svn://svn.chromium.org/chrome/trunk/src@15 0039d316-1c4b-4281-b951-d872f2087c98
author: initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> 2008-07-26 23:55:29 +0000
committer: initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98> 2008-07-26 23:55:29 +0000
commit: 09911bf300f1a419907a9412154760efd0b7abc3 (patch)
tree: f131325fb4e2ad12c6d3504ab75b16dd92facfed /chrome/browser/spellcheck_worditerator.h
parent: 586acc5fe142f498261f52c66862fa417c3d52d2 (diff)
download: chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.zip
chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.gz
chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.bz2
1 files changed, 207 insertions, 0 deletions
diff --git a/chrome/browser/spellcheck_worditerator.h b/chrome/browser/spellcheck_worditerator.h
new file mode 100644
index 0000000..02792ddc
--- /dev/null
+++ b/chrome/browser/spellcheck_worditerator.h
@@ -0,0 +1,207 @@
+// Copyright 2008, Google Inc.
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//    * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//    * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//    * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H__
+#define CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H__
+
+#include <map>
+#include <string>
+
+#include "base/basictypes.h"
+
+#include "unicode/uscript.h"
+
+// A class which handles character attributes dependent on a spellchecker and
+// its dictionary.
+// This class is used by the SpellcheckWordIterator class to determine whether
+// or not a character is one used by the spellchecker and its dictinary.
+class SpellcheckCharAttribute {
+ public:
+  SpellcheckCharAttribute();
+
+  ~SpellcheckCharAttribute();
+
+  // Sets the default language of the spell checker. This controls which
+  // characters are considered parts of words of the given language.
+  void SetDefaultLanguage(const std::wstring& language);
+
+  // Returns whether or not the given character is a character used by the
+  // selected dictionary.
+  // Parameters
+  //   * character [in] (UChar32)
+  //     Represents a Unicode character to be checked.
+  // Return values
+  //   * true
+  //     The given character is a word character.
+  //   * false
+  //     The given character is not a word character.
+  bool IsWordChar(UChar32 character) const;
+
+  // Returns whether or not the given character is a character used by
+  // contractions.
+  // Parameters
+  //   * character [in] (UChar32)
+  //     Represents a Unicode character to be checked.
+  // Return values
+  //   * true
+  //     The given character is a character used by contractions.
+  //   * false
+  //     The given character is not a character used by contractions.
+  bool IsContractionChar(UChar32 character) const;
+
+ private:
+  // Initializes the mapping table.
+  void InitializeScriptTable();
+
+  // Retrieves the ICU script code.
+  UScriptCode GetScriptCode(UChar32 character) const;
+
+  // Updates an entry in the mapping table.
+  void SetWordScript(const int script_code, bool in_use);
+
+  // Returns whether or not the given script is used by the selected
+  // dictionary.
+  bool IsWordScript(const UScriptCode script_code) const;
+
+ private:
+  // Represents a mapping table from a script code to a boolean value
+  // representing whether or not the script is used by the selected dictionary.
+  bool script_attributes_[USCRIPT_CODE_LIMIT];
+
+  // Represents a table of characters used by contractions.
+  std::map<UChar32, bool> middle_letters_;
+
+  DISALLOW_EVIL_CONSTRUCTORS(SpellcheckCharAttribute);
+};
+
+// A class which implements methods for finding the location of word boundaries
+// used by the Spellchecker class.
+// This class is implemented on the following assumptions:
+//   * An input string is encoded in UTF-16 (i.e. it may contain surrogate
+//     pairs), and;
+//   * The length of a string is the number of UTF-16 characters in the string
+//     (i.e. the length of a non-BMP character becomes two).
+class SpellcheckWordIterator {
+ public:
+  SpellcheckWordIterator();
+
+  ~SpellcheckWordIterator();
+
+  // Initializes a word-iterator object.
+  // Parameters
+  //   * attribute [in] (const SpellcheckCharAttribute*)
+  //     Represents a set of character attributes used for filtering out
+  //     non-word characters.
+  //   * word [in] (const wchar_t*)
+  //     Represents a string from which this object extracts words.
+  //     (This string does not have to be NUL-terminated.)
+  //   * length [in] (size_t)
+  //     Represents the length of the given string, in UTF-16 characters.
+  //     This value should not include terminating NUL characters.
+  //   * allow_contraction [in] (bool)
+  //     Represents a flag to control whether or not this object should split a
+  //     possible contraction (e.g. "isn't", "in'n'out", etc.)
+  // Return values
+  //   * true
+  //     This word-iterator object is initialized successfully.
+  //   * false
+  //     An error occured while initializing this object.
+  void Initialize(const SpellcheckCharAttribute* attribute,
+                  const wchar_t* word,
+                  size_t length,
+                  bool allow_contraction);
+
+  // Retrieves a word (or a contraction).
+  // Parameters
+  //   * word_string [out] (std::wstring*)
+  //     Represents a word (or a contraction) to be checked its spelling.
+  //     This |word_string| has been already normalized to its canonical form
+  //     (i.e. decomposed ligatures, replaced full-width latin characters to
+  //     its ASCII alternatives, etc.) so that a SpellChecker object can check
+  //     its spelling without any additional operations.
+  //     On the other hand, a substring of the input string
+  //       std::wstring str(&word[word_start], word_length);
+  //     represents the non-normalized version of this extracted word.
+  //   * word_start [out] (int*)
+  //     Represents the offset of this word from the beginning of the input
+  //     string, in UTF-16 characters.
+  //   * word_length [out] (int*)
+  //     Represents the length of an extracted word before normalization, in
+  //     UTF-16 characters.
+  //     When the input string contains ligatures, this value may not be equal
+  //     to the length of the |word_string|.
+  // Return values
+  //   * true
+  //     Found a word (or a contraction) to be checked its spelling.
+  //   * false
+  //     Not found any more words or contractions to be checked their spellings.
+  bool GetNextWord(std::wstring* word_string,
+                   int* word_start,
+                   int* word_length);
+
+ private:
+  // Retrieves a segment consisting of word characters (and contraction
+  // characters if the |allow_contraction| value is true).
+  void GetSegment(int* segment_start,
+                  int* segment_end);
+
+  // Discards non-word characters at the beginning and the end of the given
+  // segment.
+  void TrimSegment(int segment_start,
+                   int segment_end,
+                   int* word_start,
+                   int* word_length) const;
+
+  // Normalizes the given segment of the |word_| variable and write its
+  // canonical form to the |output_string|.
+  bool Normalize(int input_start,
+                 int input_length,
+                 std::wstring* output_string) const;
+
+ private:
+  // The pointer to the input string from which we are extracting words.
+  const wchar_t* word_;
+
+  // The length of the original string.
+  int length_;
+
+  // The current position in the original string.
+  int position_;
+
+  // The flag to control whether or not this object should extract possible
+  // contractions.
+  bool allow_contraction_;
+
+  // The character attributes used for filtering out non-word characters.
+  const SpellcheckCharAttribute* attribute_;
+
+  DISALLOW_EVIL_CONSTRUCTORS(SpellcheckWordIterator);
+};
+
+#endif  // CHROME_BROWSER_SPELLCHECK_WORDITERATOR_H__
author	initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>	2008-07-26 23:55:29 +0000
committer	initial.commit <initial.commit@0039d316-1c4b-4281-b951-d872f2087c98>	2008-07-26 23:55:29 +0000
commit	09911bf300f1a419907a9412154760efd0b7abc3 (patch)
tree	f131325fb4e2ad12c6d3504ab75b16dd92facfed /chrome/browser/spellcheck_worditerator.h
parent	586acc5fe142f498261f52c66862fa417c3d52d2 (diff)
download	chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.zip chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.gz chromium_src-09911bf300f1a419907a9412154760efd0b7abc3.tar.bz2