summaryrefslogtreecommitdiffstats
path: root/third_party/hyphen
diff options
context:
space:
mode:
authorhbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-07-17 08:56:59 +0000
committerhbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98>2012-07-17 08:56:59 +0000
commit48cf2472324304e12d7138f14841f4b2d566e39b (patch)
treea58e97dfbe2a70bafd3b27b7346ca7cee8780315 /third_party/hyphen
parent99c4c707b2cf4e0096991c47dcaf8ac57bc52eaa (diff)
downloadchromium_src-48cf2472324304e12d7138f14841f4b2d566e39b.zip
chromium_src-48cf2472324304e12d7138f14841f4b2d566e39b.tar.gz
chromium_src-48cf2472324304e12d7138f14841f4b2d566e39b.tar.bz2
Adds a hy-phen-ator.
This change adds a project file for the hyphen library and a Hyphenator class, which encapsulates the library. (This class is not integrated into Chrome, though.) BUG=47083 TEST=HyphenatorTest.HyphenateWords Review URL: https://chromiumcodereview.appspot.com/9545017 git-svn-id: svn://svn.chromium.org/chrome/trunk/src@146964 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party/hyphen')
-rw-r--r--third_party/hyphen/README.chromium9
-rw-r--r--third_party/hyphen/google.patch148
-rw-r--r--third_party/hyphen/hyph_en_US.dic2
-rw-r--r--third_party/hyphen/hyphen.c75
-rw-r--r--third_party/hyphen/hyphen.gyp32
-rw-r--r--third_party/hyphen/hyphen.h4
6 files changed, 262 insertions, 8 deletions
diff --git a/third_party/hyphen/README.chromium b/third_party/hyphen/README.chromium
index 4cbb02e..7cf556b 100644
--- a/third_party/hyphen/README.chromium
+++ b/third_party/hyphen/README.chromium
@@ -1,8 +1,15 @@
Name: hyphen
URL: http://sourceforge.net/projects/hunspell/files/Hyphen/
Version: 2.6
+License File: COPYING
+Security Critical: yes
Description:
-This is a partial copy of Hyphen 2.6.
+This is a partial copy of Hyphen 2.6 with the following changes:
+* Change the input params of hnj_hyphen_load to receive the pointer to a ruleset
+ instead of a file path.
+* Change RIGHTHYPHENMIN to 2 in hyph_en_US.dic so it hyphenates rec-i-proc-i-ty
+ as expected.
+The patch is in google.patch.
See 'hyphen.tex' for additional requirements regarding that file. \ No newline at end of file
diff --git a/third_party/hyphen/google.patch b/third_party/hyphen/google.patch
new file mode 100644
index 0000000..bca4d2f
--- /dev/null
+++ b/third_party/hyphen/google.patch
@@ -0,0 +1,148 @@
+? google.patch
+Index: hyphen.c
+===================================================================
+RCS file: /cvsroot/hunspell/hyphen/hyphen.c,v
+retrieving revision 1.4
+diff -u -r1.4 hyphen.c
+--- hyphen.c 1 Dec 2010 01:30:20 -0000 1.4
++++ hyphen.c 1 Mar 2012 05:18:32 -0000
+@@ -242,12 +242,71 @@
+ }
+ #endif
+
++#ifdef HYPHEN_CHROME_CLIENT
++typedef struct {
++ const unsigned char *data;
++ size_t offset;
++ size_t size;
++} hnj_file;
++
++static hnj_file *
++hnj_fopen (const unsigned char *data, size_t size)
++{
++ hnj_file *f;
++
++ f = hnj_malloc (sizeof(hnj_file));
++ if (f == NULL)
++ return NULL;
++ f->offset = 0;
++ f->data = data;
++ f->size = size;
++ return f;
++}
++
++static void
++hnj_fclose (hnj_file *f)
++{
++ hnj_free (f);
++}
++
++static char *
++hnj_fgets (char *s, int size, hnj_file *f)
++{
++ int i;
++
++ if (f->offset >= f->size)
++ return NULL;
++ for (i = 0; i < size - 1; i++) {
++ char c;
++
++ if (f->offset >= f->size)
++ break;
++ c = f->data[f->offset++];
++ if (c == '\r' || c == '\n')
++ break;
++ s[i] = c;
++ }
++ s[i] = '\0';
++ return s;
++}
++#else
++typedef FILE hnj_file;
++#define hnj_fopen(fn, mode) fopen((fn), (mode))
++#define hnj_fclose(f) fclose(f)
++#define hnj_fgets(s, size, f) fgets((s), (size), (f))
++#endif
++
++#ifdef HYPHEN_CHROME_CLIENT
++HyphenDict *
++hnj_hyphen_load (const unsigned char *data, size_t size)
++#else
+ HyphenDict *
+ hnj_hyphen_load (const char *fn)
++#endif
+ {
+ HyphenDict *dict[2];
+ HashTab *hashtab;
+- FILE *f;
++ hnj_file *f;
+ char buf[MAX_CHARS];
+ char word[MAX_CHARS];
+ char pattern[MAX_CHARS];
+@@ -261,7 +320,11 @@
+ HashEntry *e;
+ int nextlevel = 0;
+
++#ifdef HYPHEN_CHROME_CLIENT
++ f = hnj_fopen (data, size);
++#else
+ f = fopen (fn, "r");
++#endif
+ if (f == NULL)
+ return NULL;
+
+@@ -291,7 +354,7 @@
+ /* read in character set info */
+ if (k == 0) {
+ for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
+- if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) {
++ if (hnj_fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) {
+ for (i=0;i<MAX_NAME;i++)
+ if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
+ dict[k]->cset[i] = 0;
+@@ -304,7 +367,7 @@
+ dict[k]->utf8 = dict[0]->utf8;
+ }
+
+- while (fgets (buf, sizeof(buf), f) != NULL)
++ while (hnj_fgets (buf, sizeof(buf), f) != NULL)
+ {
+ if (buf[0] != '%')
+ {
+@@ -385,7 +448,7 @@
+ if (dict[k]->utf8) {
+ int pu = -1; /* unicode character position */
+ int ps = -1; /* unicode start position (original replindex) */
+- int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
++ size_t pc = (*word == '.') ? 1: 0; /* 8-bit character position */
+ for (; pc < (strlen(word) + 1); pc++) {
+ /* beginning of an UTF-8 character (not '10' start bits) */
+ if ((((unsigned char) word[pc]) >> 6) != 2) pu++;
+@@ -478,7 +541,7 @@
+ #endif
+ state_num = 0;
+ }
+- fclose(f);
++ hnj_fclose(f);
+ if (k == 2) dict[0]->nextlevel = dict[1];
+ return dict[0];
+ }
+Index: hyphen.h
+===================================================================
+RCS file: /cvsroot/hunspell/hyphen/hyphen.h,v
+retrieving revision 1.2
+diff -u -r1.2 hyphen.h
+--- hyphen.h 27 Nov 2010 02:20:33 -0000 1.2
++++ hyphen.h 1 Mar 2012 05:18:33 -0000
+@@ -93,7 +93,11 @@
+ int new_state;
+ };
+
++#ifdef HYPHEN_CHROME_CLIENT
++HyphenDict *hnj_hyphen_load (const unsigned char *data, size_t size);
++#else
+ HyphenDict *hnj_hyphen_load (const char *fn);
++#endif
+ void hnj_hyphen_free (HyphenDict *dict);
+
+ /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */
diff --git a/third_party/hyphen/hyph_en_US.dic b/third_party/hyphen/hyph_en_US.dic
index e38cbce..3baa02d 100644
--- a/third_party/hyphen/hyph_en_US.dic
+++ b/third_party/hyphen/hyph_en_US.dic
@@ -1,6 +1,6 @@
UTF-8
LEFTHYPHENMIN 2
-RIGHTHYPHENMIN 3
+RIGHTHYPHENMIN 2
COMPOUNDLEFTHYPHENMIN 2
COMPOUNDRIGHTHYPHENMIN 3
1'.
diff --git a/third_party/hyphen/hyphen.c b/third_party/hyphen/hyphen.c
index 26fbefd..6b9cb78 100644
--- a/third_party/hyphen/hyphen.c
+++ b/third_party/hyphen/hyphen.c
@@ -242,12 +242,71 @@ get_state_str (int state)
}
#endif
+#ifdef HYPHEN_CHROME_CLIENT
+typedef struct {
+ const unsigned char *data;
+ size_t offset;
+ size_t size;
+} hnj_file;
+
+static hnj_file *
+hnj_fopen (const unsigned char *data, size_t size)
+{
+ hnj_file *f;
+
+ f = hnj_malloc (sizeof(hnj_file));
+ if (f == NULL)
+ return NULL;
+ f->offset = 0;
+ f->data = data;
+ f->size = size;
+ return f;
+}
+
+static void
+hnj_fclose (hnj_file *f)
+{
+ hnj_free (f);
+}
+
+static char *
+hnj_fgets (char *s, int size, hnj_file *f)
+{
+ int i;
+
+ if (f->offset >= f->size)
+ return NULL;
+ for (i = 0; i < size - 1; i++) {
+ char c;
+
+ if (f->offset >= f->size)
+ break;
+ c = f->data[f->offset++];
+ if (c == '\r' || c == '\n')
+ break;
+ s[i] = c;
+ }
+ s[i] = '\0';
+ return s;
+}
+#else
+typedef FILE hnj_file;
+#define hnj_fopen(fn, mode) fopen((fn), (mode))
+#define hnj_fclose(f) fclose(f)
+#define hnj_fgets(s, size, f) fgets((s), (size), (f))
+#endif
+
+#ifdef HYPHEN_CHROME_CLIENT
+HyphenDict *
+hnj_hyphen_load (const unsigned char *data, size_t size)
+#else
HyphenDict *
hnj_hyphen_load (const char *fn)
+#endif
{
HyphenDict *dict[2];
HashTab *hashtab;
- FILE *f;
+ hnj_file *f;
char buf[MAX_CHARS];
char word[MAX_CHARS];
char pattern[MAX_CHARS];
@@ -261,7 +320,11 @@ hnj_hyphen_load (const char *fn)
HashEntry *e;
int nextlevel = 0;
- f = fopen (fn, "r");
+#ifdef HYPHEN_CHROME_CLIENT
+ f = hnj_fopen (data, size);
+#else
+ f = hnj_fopen (fn, "r");
+#endif
if (f == NULL)
return NULL;
@@ -289,7 +352,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
/* read in character set info */
if (k == 0) {
for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0;
- if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) {
+ if (hnj_fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) {
for (i=0;i<MAX_NAME;i++)
if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n'))
dict[k]->cset[i] = 0;
@@ -302,7 +365,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
dict[k]->utf8 = dict[0]->utf8;
}
- while (fgets (buf, sizeof(buf), f) != NULL)
+ while (hnj_fgets (buf, sizeof(buf), f) != NULL)
{
if (buf[0] != '%')
{
@@ -368,7 +431,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
if (dict[k]->utf8) {
int pu = -1; /* unicode character position */
int ps = -1; /* unicode start position (original replindex) */
- int pc = (*word == '.') ? 1: 0; /* 8-bit character position */
+ size_t pc = (*word == '.') ? 1: 0; /* 8-bit character position */
for (; pc < (strlen(word) + 1); pc++) {
/* beginning of an UTF-8 character (not '10' start bits) */
if ((((unsigned char) word[pc]) >> 6) != 2) pu++;
@@ -461,7 +524,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) {
#endif
state_num = 0;
}
- fclose(f);
+ hnj_fclose(f);
if (k == 2) dict[0]->nextlevel = dict[1];
return dict[0];
}
diff --git a/third_party/hyphen/hyphen.gyp b/third_party/hyphen/hyphen.gyp
new file mode 100644
index 0000000..35becc4
--- /dev/null
+++ b/third_party/hyphen/hyphen.gyp
@@ -0,0 +1,32 @@
+# Copyright (c) 2012 The Chromium Authors. All rights reserved.
+# Use of this source code is governed by a BSD-style license that can be
+# found in the LICENSE file.
+
+{
+ 'targets': [
+ {
+ 'target_name': 'hyphen',
+ 'type': '<(library)',
+ 'include_dirs': [
+ '.',
+ ],
+ 'defines': [
+ 'HYPHEN_CHROME_CLIENT',
+ ],
+ 'sources': [
+ 'hnjalloc.c',
+ 'hnjalloc.h',
+ 'hyphen.h',
+ 'hyphen.c',
+ ],
+ 'direct_dependent_settings': {
+ 'defines': [
+ 'HYPHEN_CHROME_CLIENT',
+ ],
+ 'include_dirs': [
+ '.',
+ ],
+ },
+ },
+ ],
+}
diff --git a/third_party/hyphen/hyphen.h b/third_party/hyphen/hyphen.h
index 5d79308..b5517d3 100644
--- a/third_party/hyphen/hyphen.h
+++ b/third_party/hyphen/hyphen.h
@@ -90,7 +90,11 @@ struct _HyphenTrans {
int new_state;
};
+#ifdef HYPHEN_CHROME_CLIENT
+HyphenDict *hnj_hyphen_load (const unsigned char *data, size_t size);
+#else
HyphenDict *hnj_hyphen_load (const char *fn);
+#endif
void hnj_hyphen_free (HyphenDict *dict);
/* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */