Adds a hy-phen-ator.

This change adds a project file for the hyphen library and a Hyphenator class, which encapsulates the library. (This class is not integrated into Chrome, though.) BUG=47083 TEST=HyphenatorTest.HyphenateWords Review URL: https://chromiumcodereview.appspot.com/9545017 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@146964 0039d316-1c4b-4281-b951-d872f2087c98
author: hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-07-17 08:56:59 +0000
committer: hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> 2012-07-17 08:56:59 +0000
commit: 48cf2472324304e12d7138f14841f4b2d566e39b (patch)
tree: a58e97dfbe2a70bafd3b27b7346ca7cee8780315
parent: 99c4c707b2cf4e0096991c47dcaf8ac57bc52eaa (diff)
download: chromium_src-48cf2472324304e12d7138f14841f4b2d566e39b.zip
chromium_src-48cf2472324304e12d7138f14841f4b2d566e39b.tar.gz
chromium_src-48cf2472324304e12d7138f14841f4b2d566e39b.tar.bz2
11 files changed, 653 insertions, 8 deletions
diff --git a/content/content_renderer.gypi b/content/content_renderer.gypi
index 2d4a777..2e5ad41 100644
--- a/content/content_renderer.gypi
+++ b/content/content_renderer.gypi
@@ -10,6 +10,7 @@
     '../ppapi/ppapi_internal.gyp:ppapi_proxy',
     '../ppapi/ppapi_internal.gyp:ppapi_shared',
     '../skia/skia.gyp:skia',
+    '../third_party/hyphen/hyphen.gyp:hyphen',
     '../third_party/icu/icu.gyp:icuuc',
     '../third_party/icu/icu.gyp:icui18n',
     '../third_party/libjingle/libjingle.gyp:libjingle',
@@ -84,6 +85,8 @@
     'renderer/gpu/gpu_benchmarking_extension.h',
     'renderer/gpu/stream_texture_host_android.cc',
     'renderer/gpu/stream_texture_host_android.h',
+    'renderer/hyphenator/hyphenator.cc',
+    'renderer/hyphenator/hyphenator.h',
     'renderer/idle_user_detector.cc',
     'renderer/idle_user_detector.h',
     'renderer/input_tag_speech_dispatcher.cc',
diff --git a/content/content_tests.gypi b/content/content_tests.gypi
index 77b8f47..30f8bb5 100644
--- a/content/content_tests.gypi
+++ b/content/content_tests.gypi
@@ -323,6 +323,7 @@
         'renderer/android/email_detector_unittest.cc',
         'renderer/android/phone_number_detector_unittest.cc',
         'renderer/gpu/input_event_filter_unittest.cc',
+        'renderer/hyphenator/hyphenator_unittest.cc',
         'renderer/media/audio_device_unittest.cc',
         'renderer/media/audio_message_filter_unittest.cc',
         'renderer/media/audio_renderer_mixer_manager_unittest.cc',
diff --git a/content/renderer/hyphenator/hyphenator.cc b/content/renderer/hyphenator/hyphenator.cc
new file mode 100644
index 0000000..da92f9e
--- /dev/null
+++ b/content/renderer/hyphenator/hyphenator.cc
@@ -0,0 +1,231 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content/renderer/hyphenator/hyphenator.h"
+
+#include "base/file_util.h"
+#include "base/logging.h"
+#include "base/memory/scoped_ptr.h"
+#include "base/string_util.h"
+#include "base/utf_string_conversions.h"
+#include "third_party/hyphen/hyphen.h"
+#include "unicode/uscript.h"
+
+namespace {
+
+// A class that converts a sequence of UTF-8 characters to UTF-16 ones and holds
+// only the length of converted UTF-16 characters. This class is used for
+// creating a mapping from the position of a UTF-8 string to a position of a
+// UTF-16 string without unnecessary conversions. Even though the following
+// snippet produces the same mapping, it needs to convert same characters many
+// times. This class incrementally counts the number of converted UTF-16
+// characters to avoid this problem.
+//
+//   scoped_array<size_t> position(new size_t[text.length()]);
+//   for (size_t i = 0; i < text.length(); ++i)
+//     position[i] = UTF8ToUTF16(text.substr(0, i)).length();
+//
+class UTF16TextLength {
+ public:
+  UTF16TextLength();
+  ~UTF16TextLength();
+
+  // Returns the current position.
+  int utf16_length() const { return utf16_length_; }
+
+  // Appends one UTF-8 character to this converter and advances the converted
+  // position. This converter increases the position by one when it finishes
+  // reading a BMP character and increases by two when it finish reading a
+  // non-BMP character.
+  void Append(char c);
+
+ private:
+  // The length of the converted UTF-16 text.
+  int utf16_length_;
+
+  // The buffer that stores UTF-8 characters being converted.
+  std::string utf8_text_;
+
+  DISALLOW_COPY_AND_ASSIGN(UTF16TextLength);
+};
+
+UTF16TextLength::UTF16TextLength()
+    : utf16_length_(0) {
+}
+
+UTF16TextLength::~UTF16TextLength() {
+}
+
+void UTF16TextLength::Append(char c) {
+  // Append the given character and try converting the UTF-8 characters in this
+  // buffer to Unicode codepoints. If this buffer includes a Unicode codepoint,
+  // get the number of UTF-16 characters representing this codepoint and advance
+  // the position.
+  int code = 0;
+  int index = 0;
+  utf8_text_.push_back(c);
+  U8_NEXT(utf8_text_.data(), index, static_cast<int>(utf8_text_.length()),
+          code);
+  if (code != U_SENTINEL) {
+    utf8_text_.clear();
+    utf16_length_ += U16_LENGTH(code);
+  }
+}
+
+// A class that encapsulates a hyphenation query. This class owns resources
+// temporarily needed for hyphenating one word, and deletes them when it is
+// deleted as listed in the following snippet.
+//
+//   std::vector<int> hyphens;
+//   QUery query(UTF8ToUTF16("hyphenate"));
+//   query.Hyphenate(dict, &hyphens);
+//
+class Query {
+ public:
+  explicit Query(const string16& word);
+  ~Query();
+
+  // Hyphenates a word with the specified dictionary. This function hyphenates
+  // the word provided to its constructor and returns a list of hyphenation
+  // points, positions where we can insert hyphens.
+  bool Hyphenate(HyphenDict* dictionary, std::vector<int>* hyphen_offsets);
+
+ private:
+  // A word to be hyphenated.
+  std::string word_utf8_;
+
+  // Return variables from the hyphen library.
+  scoped_array<char> hyphen_vector_;
+  char** rep_;
+  int* pos_;
+  int* cut_;
+
+  DISALLOW_COPY_AND_ASSIGN(Query);
+};
+
+Query::Query(const string16& word)
+    : rep_(NULL),
+      pos_(NULL),
+      cut_(NULL) {
+  // Remove trailing punctuation characters. WebKit does not remove these
+  // characters when it hyphenates a word. These characters prevent the hyphen
+  // library from applying some rules, i.e. they prevent the library from adding
+  // hyphens.
+  DCHECK(!word.empty());
+  const char16* data = word.data();
+  int length = static_cast<int>(word.length());
+  while (length > 0) {
+    int previous = length;
+    int code = 0;
+    U16_PREV(data, 0, previous, code);
+    UErrorCode error = U_ZERO_ERROR;
+    if (uscript_getScript(code, &error) != USCRIPT_COMMON)
+      break;
+    length = previous;
+  }
+  UTF16ToUTF8(word.c_str(), length, &word_utf8_);
+  // Create a hyphen vector used by hnj_hyphen_hyphenate2(). We allocate a
+  // buffer of |word_.length()| + 5 as written in Line 112 of
+  // <http://cs.chromium.org/src/third_party/hyphen/hyphen.h>.
+  hyphen_vector_.reset(new char[word_utf8_.length() + 5]);
+}
+
+Query::~Query() {
+  if (rep_) {
+    for (size_t i = 0; i < word_utf8_.length(); ++i) {
+      if (rep_[i])
+        free(rep_[i]);
+    }
+    free(rep_);
+  }
+  if (pos_)
+    free(pos_);
+  if (cut_)
+    free(cut_);
+}
+
+bool Query::Hyphenate(HyphenDict* dictionary,
+                      std::vector<int>* hyphen_offsets) {
+  DCHECK(dictionary);
+  DCHECK(hyphen_offsets);
+
+  int error_code = hnj_hyphen_hyphenate2(dictionary,
+                                         word_utf8_.data(),
+                                         static_cast<int>(word_utf8_.length()),
+                                         hyphen_vector_.get(),
+                                         NULL,
+                                         &rep_,
+                                         &pos_,
+                                         &cut_);
+  if (error_code)
+    return false;
+
+  // WebKit needs hyphenation points counted in UTF-16 characters. On the other
+  // hand, the hyphen library returns hyphenation points counted in UTF-8
+  // characters. We increamentally convert hyphenation points in UTF-8
+  // characters to hyphenation points in UTF-16 characters and write the
+  // converted hyphenation points to the output vector.
+  UTF16TextLength text_length;
+  hyphen_offsets->clear();
+  for (size_t i = 0; i < word_utf8_.length(); ++i) {
+    text_length.Append(word_utf8_[i]);
+    if (hyphen_vector_[i] & 1)
+      hyphen_offsets->push_back(text_length.utf16_length());
+  }
+  return !hyphen_offsets->empty();
+}
+
+}  // namespace
+
+namespace content {
+
+Hyphenator::Hyphenator(base::PlatformFile file)
+    : dictionary_(NULL),
+      rule_file_(file),
+      result_(0) {
+}
+
+Hyphenator::~Hyphenator() {
+  if (dictionary_)
+    hnj_hyphen_free(dictionary_);
+}
+
+bool Hyphenator::Initialize() {
+  if (dictionary_)
+    return true;
+
+  rule_map_.reset(new file_util::MemoryMappedFile);
+  if (!rule_map_->Initialize(rule_file_))
+    return false;
+
+  dictionary_ = hnj_hyphen_load(rule_map_->data(), rule_map_->length());
+  return !!dictionary_;
+}
+
+size_t Hyphenator::ComputeLastHyphenLocation(const string16& word,
+                                             size_t before_index) {
+  if (!dictionary_ || word.empty())
+    return 0;
+
+  // Call the hyphen library to get all hyphenation points, i.e. positions where
+  // we can insert hyphens. When WebKit finds a line-break, it calls this
+  // function twice or more with the same word to find the best hyphenation
+  // point. To avoid calling the hyphen library twice or more with the same
+  // word, we cache the last query.
+  if (word_ != word) {
+    word_ = word;
+    Query query(word);
+    result_ = query.Hyphenate(dictionary_, &hyphen_offsets_);
+  }
+  if (!result_)
+    return 0;
+  for (std::vector<int>::reverse_iterator it = hyphen_offsets_.rbegin();
+       it != hyphen_offsets_.rend(); ++it) {
+    if (static_cast<size_t>(*it) < before_index)
+      return *it;
+  }
+  return 0;
+}
+
+}  // namespace content
diff --git a/content/renderer/hyphenator/hyphenator.h b/content/renderer/hyphenator/hyphenator.h
new file mode 100644
index 0000000..561af80
--- /dev/null
+++ b/content/renderer/hyphenator/hyphenator.h
@@ -0,0 +1,66 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef CONTENT_RENDERER_HYPHENATOR_HYPHENATOR_H_
+#define CONTENT_RENDERER_HYPHENATOR_HYPHENATOR_H_
+
+#include <vector>
+
+#include "base/memory/scoped_ptr.h"
+#include "base/platform_file.h"
+#include "base/string16.h"
+#include "content/common/content_export.h"
+
+namespace file_util {
+class MemoryMappedFile;
+}
+
+typedef struct _HyphenDict HyphenDict;
+
+namespace content {
+
+// A class that hyphenates a word. This class encapsulates the hyphen library
+// and manages resources used by the library. When this class uses a huge
+// dictionary, it takes lots of memory (~1.3MB for English). A renderer should
+// create this object only when it renders a page that needs hyphenation and
+// deletes it when it moves to a page that does not need hyphenation.
+class CONTENT_EXPORT Hyphenator {
+ public:
+  explicit Hyphenator(base::PlatformFile file);
+  ~Hyphenator();
+
+  // Initializes the hyphen library and allocates resources needed for
+  // hyphenation.
+  bool Initialize();
+
+  // Returns the last hyphenation point, the position where we can insert a
+  // hyphen, before the given position. If there are not any hyphenation points,
+  // this function returns 0.
+  size_t ComputeLastHyphenLocation(const string16& word, size_t before_index);
+
+ private:
+  // The dictionary used by the hyphen library.
+  HyphenDict* dictionary_;
+
+  // The dictionary file and its memory-mapping object. (Our copy of the hyphen
+  // library uses a memory-mapped file opened by a browser so renderers can use
+  // it without opening the file.)
+  base::PlatformFile rule_file_;
+  scoped_ptr<file_util::MemoryMappedFile> rule_map_;
+
+  // A cached result. WebKit often calls ComputeLastHyphenLocation with the same
+  // word multiple times to find the best hyphenation point when it finds a line
+  // break. On the other hand, the hyphen library returns all hyphenation points
+  // for a word. This class caches the hyphenation points returned by the hyphen
+  // library to avoid calling the library multiple times.
+  string16 word_;
+  bool result_;
+  std::vector<int> hyphen_offsets_;
+
+  DISALLOW_COPY_AND_ASSIGN(Hyphenator);
+};
+
+}  // namespace content
+
+#endif  // CONTENT_RENDERER_HYPHENATOR_HYPHENATOR_H_
diff --git a/content/renderer/hyphenator/hyphenator_unittest.cc b/content/renderer/hyphenator/hyphenator_unittest.cc
new file mode 100644
index 0000000..84c1ce1
--- /dev/null
+++ b/content/renderer/hyphenator/hyphenator_unittest.cc
@@ -0,0 +1,90 @@
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "content/renderer/hyphenator/hyphenator.h"
+
+#include "base/path_service.h"
+#include "base/platform_file.h"
+#include "base/utf_string_conversions.h"
+#include "testing/gtest/include/gtest/gtest.h"
+#include "third_party/hyphen/hyphen.h"
+
+// A unit test for our hyphenator. This class loads a sample hyphenation
+// dictionary and hyphenates words.
+class HyphenatorTest : public testing::Test {
+ public:
+  HyphenatorTest() {
+    Initialize();
+  }
+
+  bool Initialize() {
+    FilePath dictionary_path;
+    if (!PathService::Get(base::DIR_SOURCE_ROOT, &dictionary_path))
+      return false;
+    dictionary_path = dictionary_path.AppendASCII("third_party");
+    dictionary_path = dictionary_path.AppendASCII("hyphen");
+    dictionary_path = dictionary_path.AppendASCII("hyph_en_US.dic");
+    base::PlatformFile file = base::CreatePlatformFile(
+        dictionary_path, base::PLATFORM_FILE_OPEN | base::PLATFORM_FILE_READ,
+        NULL, NULL);
+    hyphenator_.reset(new content::Hyphenator(file));
+    return hyphenator_->Initialize();
+  }
+
+  // Creates a human-readable hyphenated word. This function inserts '-'
+  // characters to all places where we can insert hyphens to improve the
+  // readability of this unit test.
+  string16 Hyphenate(const string16& word) {
+    string16 hyphenated_word(word);
+    size_t position = word.length();
+    while (position > 0) {
+      size_t new_position = hyphenator_->ComputeLastHyphenLocation(word,
+                                                                   position);
+      EXPECT_LT(new_position, position);
+      if (new_position > 0)
+        hyphenated_word.insert(new_position, 1, '-');
+      position = new_position;
+    }
+    return hyphenated_word;
+  }
+
+ private:
+  scoped_ptr<content::Hyphenator> hyphenator_;
+};
+
+// Verifies that our hyphenator yields the same hyphenated words as the original
+// hyphen library does.
+TEST_F(HyphenatorTest, HyphenateWords) {
+  static const struct {
+    const char* input;
+    const char* expected;
+  } kTestCases[] = {
+    { "and", "and" },
+    { "concupiscent,", "con-cu-pis-cent," },
+    { "evidence.", "ev-i-dence." },
+    { "first", "first" },
+    { "getting", "get-ting" },
+    { "hedgehog", "hedge-hog" },
+    { "remarkable", "re-mark-able" },
+    { "straightened", "straight-ened" },
+    { "undid", "un-did" },
+    { "were", "were" },
+    { "Simply", "Sim-ply" },
+    { "Undone.", "Un-done." },
+    { "comfortably", "com-fort-ably"},
+    { "declination", "dec-li-na-tion" },
+    { "flamingo:", "flamin-go:" },
+    { "lination", "lina-tion" },
+    { "reciprocity", "rec-i-proc-i-ty" },
+    { "throughout", "through-out" },
+    { "undid", "un-did" },
+    { "undone.", "un-done." },
+    { "unnecessary", "un-nec-es-sary" },
+  };
+  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(kTestCases); ++i) {
+    string16 input = ASCIIToUTF16(kTestCases[i].input);
+    string16 expected = ASCIIToUTF16(kTestCases[i].expected);
+    EXPECT_EQ(expected, Hyphenate(input));
+  }
+}
diff --git a/third_party/hyphen/README.chromium b/third_party/hyphen/README.chromium
index 4cbb02e..7cf556b 100644
--- a/third_party/hyphen/README.chromium
+++ b/third_party/hyphen/README.chromium
@@ -1,8 +1,15 @@
 Name: hyphen
 URL: http://sourceforge.net/projects/hunspell/files/Hyphen/
 Version: 2.6
+License File: COPYING
+Security Critical: yes
 
 Description:
-This is a partial copy of Hyphen 2.6.
+This is a partial copy of Hyphen 2.6 with the following changes:
+* Change the input params of hnj_hyphen_load to receive the pointer to a ruleset
+  instead of a file path.
+* Change RIGHTHYPHENMIN to 2 in hyph_en_US.dic so it hyphenates rec-i-proc-i-ty
+  as expected.
+The patch is in google.patch.
 
 See 'hyphen.tex' for additional requirements regarding that file. 
 \ No newline at end of file
diff --git a/third_party/hyphen/google.patch b/third_party/hyphen/google.patch
new file mode 100644
index 0000000..bca4d2f
--- /dev/null
+++ b/third_party/hyphen/google.patch
@@ -0,0 +1,148 @@
+? google.patch
+Index: hyphen.c
+===================================================================
+RCS file: /cvsroot/hunspell/hyphen/hyphen.c,v
+retrieving revision 1.4
+diff -u -r1.4 hyphen.c
+--- hyphen.c	1 Dec 2010 01:30:20 -0000	1.4
++++ hyphen.c	1 Mar 2012 05:18:32 -0000
+@@ -242,12 +242,71 @@
+ }
+ #endif
+ 
++#ifdef HYPHEN_CHROME_CLIENT
++typedef struct {
++  const unsigned char *data;
++  size_t offset;
++  size_t size;
++} hnj_file;
++
++static hnj_file *
++hnj_fopen (const unsigned char *data, size_t size)
++{
++  hnj_file *f;
++
++  f = hnj_malloc (sizeof(hnj_file));
++  if (f == NULL)
++    return NULL;
++  f->offset = 0;
++  f->data = data;
++  f->size = size;
++  return f;
++}
++
++static void
++hnj_fclose (hnj_file *f)
++{
++  hnj_free (f);
++}
++
++static char *
++hnj_fgets (char *s, int size, hnj_file *f)
++{
++  int i;
++
++  if (f->offset >= f->size)
++    return NULL;
++  for (i = 0; i < size - 1; i++) {
++    char c;
++
++    if (f->offset >= f->size)
++      break;
++    c = f->data[f->offset++];
++    if (c == '\r' || c == '\n')
++      break;
++    s[i] = c;
++  }
++  s[i] = '\0';
++  return s;
++}
++#else
++typedef FILE hnj_file;
++#define hnj_fopen(fn, mode) fopen((fn), (mode))
++#define hnj_fclose(f) fclose(f)
++#define hnj_fgets(s, size, f) fgets((s), (size), (f))
++#endif
++
++#ifdef HYPHEN_CHROME_CLIENT
++HyphenDict *
++hnj_hyphen_load (const unsigned char *data, size_t size)
++#else
+ HyphenDict *
+ hnj_hyphen_load (const char *fn)
++#endif
+ {
+   HyphenDict *dict[2];
+   HashTab *hashtab;
+-  FILE *f;
++  hnj_file *f;
+   char buf[MAX_CHARS];
+   char word[MAX_CHARS];
+   char pattern[MAX_CHARS];
+@@ -261,7 +320,11 @@
+   HashEntry *e;
+   int nextlevel = 0;
+ 
++#ifdef HYPHEN_CHROME_CLIENT
++  f = hnj_fopen (data, size);
++#else
+   f = fopen (fn, "r");
++#endif
+   if (f == NULL)
+     return NULL;
+ 
+@@ -291,7 +354,7 @@
+   /* read in character set info */
+   if (k == 0) {
+     for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
+-    if (fgets(dict[k]->cset,  sizeof(dict[k]->cset),f) != NULL) {
++    if (hnj_fgets(dict[k]->cset,  sizeof(dict[k]->cset),f) != NULL) {
+       for (i=0;i<MAX_NAME;i++)
+         if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
+           dict[k]->cset[i] = 0;
+@@ -304,7 +367,7 @@
+     dict[k]->utf8 = dict[0]->utf8;
+   }
+ 
+-  while (fgets (buf, sizeof(buf), f) != NULL)
++  while (hnj_fgets (buf, sizeof(buf), f) != NULL)
+     {
+       if (buf[0] != '%')
+ 	{
+@@ -385,7 +448,7 @@
+             if (dict[k]->utf8) {
+                 int pu = -1;        /* unicode character position */
+                 int ps = -1;        /* unicode start position (original replindex) */
+-                int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
++                size_t pc = (*word == '.') ? 1: 0; /* 8-bit character position */
+                 for (; pc < (strlen(word) + 1); pc++) {
+                 /* beginning of an UTF-8 character (not '10' start bits) */
+                     if ((((unsigned char) word[pc]) >> 6) != 2) pu++;
+@@ -478,7 +541,7 @@
+ #endif
+   state_num = 0;
+ }
+-  fclose(f);
++  hnj_fclose(f);
+   if (k == 2) dict[0]->nextlevel = dict[1];
+   return dict[0];
+ }
+Index: hyphen.h
+===================================================================
+RCS file: /cvsroot/hunspell/hyphen/hyphen.h,v
+retrieving revision 1.2
+diff -u -r1.2 hyphen.h
+--- hyphen.h	27 Nov 2010 02:20:33 -0000	1.2
++++ hyphen.h	1 Mar 2012 05:18:33 -0000
+@@ -93,7 +93,11 @@
+   int new_state;
+ };
+ 
++#ifdef HYPHEN_CHROME_CLIENT
++HyphenDict *hnj_hyphen_load (const unsigned char *data, size_t size);
++#else
+ HyphenDict *hnj_hyphen_load (const char *fn);
++#endif
+ void hnj_hyphen_free (HyphenDict *dict);
+ 
+ /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */
diff --git a/third_party/hyphen/hyph_en_US.dic b/third_party/hyphen/hyph_en_US.dic
index e38cbce..3baa02d 100644
--- a/third_party/hyphen/hyph_en_US.dic
+++ b/third_party/hyphen/hyph_en_US.dic
@@ -1,6 +1,6 @@
 UTF-8
 LEFTHYPHENMIN 2
-RIGHTHYPHENMIN 3
+RIGHTHYPHENMIN 2
 COMPOUNDLEFTHYPHENMIN 2
 COMPOUNDRIGHTHYPHENMIN 3
 1'.
diff --git a/third_party/hyphen/hyphen.c b/third_party/hyphen/hyphen.c
index 26fbefd..6b9cb78 100644
--- a/third_party/hyphen/hyphen.c
+++ b/third_party/hyphen/hyphen.c
@@ -242,12 +242,71 @@ get_state_str (int state)
 }
 #endif
 
+#ifdef HYPHEN_CHROME_CLIENT
+typedef struct {
+  const unsigned char *data;
+  size_t offset;
+  size_t size;
+} hnj_file;
+
+static hnj_file *
+hnj_fopen (const unsigned char *data, size_t size)
+{
+  hnj_file *f;
+
+  f = hnj_malloc (sizeof(hnj_file));
+  if (f == NULL)
+    return NULL;
+  f->offset = 0;
+  f->data = data;
+  f->size = size;
+  return f;
+}
+
+static void
+hnj_fclose (hnj_file *f)
+{
+  hnj_free (f);
+}
+
+static char *
+hnj_fgets (char *s, int size, hnj_file *f)
+{
+  int i;
+
+  if (f->offset >= f->size)
+    return NULL;
+  for (i = 0; i < size - 1; i++) {
+    char c;
+
+    if (f->offset >= f->size)
+      break;
+    c = f->data[f->offset++];
+    if (c == '\r' || c == '\n')
+      break;
+    s[i] = c;
+  }
+  s[i] = '\0';
+  return s;
+}
+#else
+typedef FILE hnj_file;
+#define hnj_fopen(fn, mode) fopen((fn), (mode))
+#define hnj_fclose(f) fclose(f)
+#define hnj_fgets(s, size, f) fgets((s), (size), (f))
+#endif
+
+#ifdef HYPHEN_CHROME_CLIENT
+HyphenDict *
+hnj_hyphen_load (const unsigned char *data, size_t size)
+#else
 HyphenDict *
 hnj_hyphen_load (const char *fn)
+#endif
 {
   HyphenDict *dict[2];
   HashTab *hashtab;
-  FILE *f;
+  hnj_file *f;
   char buf[MAX_CHARS];
   char word[MAX_CHARS];
   char pattern[MAX_CHARS];
@@ -261,7 +320,11 @@ hnj_hyphen_load (const char *fn)
   HashEntry *e;
   int nextlevel = 0;
 
-  f = fopen (fn, "r");
+#ifdef HYPHEN_CHROME_CLIENT
+  f = hnj_fopen (data, size);
+#else
+  f = hnj_fopen (fn, "r");
+#endif
   if (f == NULL)
     return NULL;
 
@@ -289,7 +352,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
   /* read in character set info */
   if (k == 0) {
     for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
-    if (fgets(dict[k]->cset,  sizeof(dict[k]->cset),f) != NULL) {
+    if (hnj_fgets(dict[k]->cset,  sizeof(dict[k]->cset),f) != NULL) {
       for (i=0;i<MAX_NAME;i++)
         if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
           dict[k]->cset[i] = 0;
@@ -302,7 +365,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
     dict[k]->utf8 = dict[0]->utf8;
   }
 
-  while (fgets (buf, sizeof(buf), f) != NULL)
+  while (hnj_fgets (buf, sizeof(buf), f) != NULL)
     {
       if (buf[0] != '%')
 	{
@@ -368,7 +431,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
             if (dict[k]->utf8) {
                 int pu = -1;        /* unicode character position */
                 int ps = -1;        /* unicode start position (original replindex) */
-                int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
+                size_t pc = (*word == '.') ? 1: 0; /* 8-bit character position */
                 for (; pc < (strlen(word) + 1); pc++) {
                 /* beginning of an UTF-8 character (not '10' start bits) */
                     if ((((unsigned char) word[pc]) >> 6) != 2) pu++;
@@ -461,7 +524,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
 #endif
   state_num = 0;
 }
-  fclose(f);
+  hnj_fclose(f);
   if (k == 2) dict[0]->nextlevel = dict[1];
   return dict[0];
 }
diff --git a/third_party/hyphen/hyphen.gyp b/third_party/hyphen/hyphen.gyp
new file mode 100644
index 0000000..35becc4
--- /dev/null
+++ b/third_party/hyphen/hyphen.gyp
@@ -0,0 +1,32 @@
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+{
+  'targets': [
+    {
+      'target_name': 'hyphen',
+      'type': '<(library)',
+      'include_dirs': [
+        '.',
+      ],
+      'defines': [
+        'HYPHEN_CHROME_CLIENT',
+      ],
+      'sources': [
+        'hnjalloc.c',
+        'hnjalloc.h',
+        'hyphen.h',
+        'hyphen.c',
+      ],
+      'direct_dependent_settings': {
+        'defines': [
+          'HYPHEN_CHROME_CLIENT',
+        ],
+        'include_dirs': [
+          '.',
+        ],
+      },
+    },
+  ],
+}
diff --git a/third_party/hyphen/hyphen.h b/third_party/hyphen/hyphen.h
index 5d79308..b5517d3 100644
--- a/third_party/hyphen/hyphen.h
+++ b/third_party/hyphen/hyphen.h
@@ -90,7 +90,11 @@ struct _HyphenTrans {
   int new_state;
 };
 
+#ifdef HYPHEN_CHROME_CLIENT
+HyphenDict *hnj_hyphen_load (const unsigned char *data, size_t size);
+#else
 HyphenDict *hnj_hyphen_load (const char *fn);
+#endif
 void hnj_hyphen_free (HyphenDict *dict);
 
 /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */
author	hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-07-17 08:56:59 +0000
committer	hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>	2012-07-17 08:56:59 +0000
commit	48cf2472324304e12d7138f14841f4b2d566e39b (patch)
tree	a58e97dfbe2a70bafd3b27b7346ca7cee8780315
parent	99c4c707b2cf4e0096991c47dcaf8ac57bc52eaa (diff)
download	chromium_src-48cf2472324304e12d7138f14841f4b2d566e39b.zip chromium_src-48cf2472324304e12d7138f14841f4b2d566e39b.tar.gz chromium_src-48cf2472324304e12d7138f14841f4b2d566e39b.tar.bz2