diff options
author | hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-07-17 08:56:59 +0000 |
---|---|---|
committer | hbono@chromium.org <hbono@chromium.org@0039d316-1c4b-4281-b951-d872f2087c98> | 2012-07-17 08:56:59 +0000 |
commit | 48cf2472324304e12d7138f14841f4b2d566e39b (patch) | |
tree | a58e97dfbe2a70bafd3b27b7346ca7cee8780315 /third_party/hyphen | |
parent | 99c4c707b2cf4e0096991c47dcaf8ac57bc52eaa (diff) | |
download | chromium_src-48cf2472324304e12d7138f14841f4b2d566e39b.zip chromium_src-48cf2472324304e12d7138f14841f4b2d566e39b.tar.gz chromium_src-48cf2472324304e12d7138f14841f4b2d566e39b.tar.bz2 |
Adds a hy-phen-ator.
This change adds a project file for the hyphen library and a Hyphenator class, which encapsulates the library. (This class is not integrated into Chrome, though.)
BUG=47083
TEST=HyphenatorTest.HyphenateWords
Review URL: https://chromiumcodereview.appspot.com/9545017
git-svn-id: svn://svn.chromium.org/chrome/trunk/src@146964 0039d316-1c4b-4281-b951-d872f2087c98
Diffstat (limited to 'third_party/hyphen')
-rw-r--r-- | third_party/hyphen/README.chromium | 9 | ||||
-rw-r--r-- | third_party/hyphen/google.patch | 148 | ||||
-rw-r--r-- | third_party/hyphen/hyph_en_US.dic | 2 | ||||
-rw-r--r-- | third_party/hyphen/hyphen.c | 75 | ||||
-rw-r--r-- | third_party/hyphen/hyphen.gyp | 32 | ||||
-rw-r--r-- | third_party/hyphen/hyphen.h | 4 |
6 files changed, 262 insertions, 8 deletions
diff --git a/third_party/hyphen/README.chromium b/third_party/hyphen/README.chromium index 4cbb02e..7cf556b 100644 --- a/third_party/hyphen/README.chromium +++ b/third_party/hyphen/README.chromium @@ -1,8 +1,15 @@ Name: hyphen URL: http://sourceforge.net/projects/hunspell/files/Hyphen/ Version: 2.6 +License File: COPYING +Security Critical: yes Description: -This is a partial copy of Hyphen 2.6. +This is a partial copy of Hyphen 2.6 with the following changes: +* Change the input params of hnj_hyphen_load to receive the pointer to a ruleset + instead of a file path. +* Change RIGHTHYPHENMIN to 2 in hyph_en_US.dic so it hyphenates rec-i-proc-i-ty + as expected. +The patch is in google.patch. See 'hyphen.tex' for additional requirements regarding that file.
\ No newline at end of file diff --git a/third_party/hyphen/google.patch b/third_party/hyphen/google.patch new file mode 100644 index 0000000..bca4d2f --- /dev/null +++ b/third_party/hyphen/google.patch @@ -0,0 +1,148 @@ +? google.patch +Index: hyphen.c +=================================================================== +RCS file: /cvsroot/hunspell/hyphen/hyphen.c,v +retrieving revision 1.4 +diff -u -r1.4 hyphen.c +--- hyphen.c 1 Dec 2010 01:30:20 -0000 1.4 ++++ hyphen.c 1 Mar 2012 05:18:32 -0000 +@@ -242,12 +242,71 @@ + } + #endif + ++#ifdef HYPHEN_CHROME_CLIENT ++typedef struct { ++ const unsigned char *data; ++ size_t offset; ++ size_t size; ++} hnj_file; ++ ++static hnj_file * ++hnj_fopen (const unsigned char *data, size_t size) ++{ ++ hnj_file *f; ++ ++ f = hnj_malloc (sizeof(hnj_file)); ++ if (f == NULL) ++ return NULL; ++ f->offset = 0; ++ f->data = data; ++ f->size = size; ++ return f; ++} ++ ++static void ++hnj_fclose (hnj_file *f) ++{ ++ hnj_free (f); ++} ++ ++static char * ++hnj_fgets (char *s, int size, hnj_file *f) ++{ ++ int i; ++ ++ if (f->offset >= f->size) ++ return NULL; ++ for (i = 0; i < size - 1; i++) { ++ char c; ++ ++ if (f->offset >= f->size) ++ break; ++ c = f->data[f->offset++]; ++ if (c == '\r' || c == '\n') ++ break; ++ s[i] = c; ++ } ++ s[i] = '\0'; ++ return s; ++} ++#else ++typedef FILE hnj_file; ++#define hnj_fopen(fn, mode) fopen((fn), (mode)) ++#define hnj_fclose(f) fclose(f) ++#define hnj_fgets(s, size, f) fgets((s), (size), (f)) ++#endif ++ ++#ifdef HYPHEN_CHROME_CLIENT ++HyphenDict * ++hnj_hyphen_load (const unsigned char *data, size_t size) ++#else + HyphenDict * + hnj_hyphen_load (const char *fn) ++#endif + { + HyphenDict *dict[2]; + HashTab *hashtab; +- FILE *f; ++ hnj_file *f; + char buf[MAX_CHARS]; + char word[MAX_CHARS]; + char pattern[MAX_CHARS]; +@@ -261,7 +320,11 @@ + HashEntry *e; + int nextlevel = 0; + ++#ifdef HYPHEN_CHROME_CLIENT ++ f = hnj_fopen (data, size); ++#else + f = fopen (fn, "r"); ++#endif + if (f == NULL) + return NULL; + +@@ -291,7 +354,7 @@ + /* read in character set info */ + if (k == 0) { + for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; +- if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { ++ if (hnj_fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { + for (i=0;i<MAX_NAME;i++) + if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) + dict[k]->cset[i] = 0; +@@ -304,7 +367,7 @@ + dict[k]->utf8 = dict[0]->utf8; + } + +- while (fgets (buf, sizeof(buf), f) != NULL) ++ while (hnj_fgets (buf, sizeof(buf), f) != NULL) + { + if (buf[0] != '%') + { +@@ -385,7 +448,7 @@ + if (dict[k]->utf8) { + int pu = -1; /* unicode character position */ + int ps = -1; /* unicode start position (original replindex) */ +- int pc = (*word == '.') ? 1: 0; /* 8-bit character position */ ++ size_t pc = (*word == '.') ? 1: 0; /* 8-bit character position */ + for (; pc < (strlen(word) + 1); pc++) { + /* beginning of an UTF-8 character (not '10' start bits) */ + if ((((unsigned char) word[pc]) >> 6) != 2) pu++; +@@ -478,7 +541,7 @@ + #endif + state_num = 0; + } +- fclose(f); ++ hnj_fclose(f); + if (k == 2) dict[0]->nextlevel = dict[1]; + return dict[0]; + } +Index: hyphen.h +=================================================================== +RCS file: /cvsroot/hunspell/hyphen/hyphen.h,v +retrieving revision 1.2 +diff -u -r1.2 hyphen.h +--- hyphen.h 27 Nov 2010 02:20:33 -0000 1.2 ++++ hyphen.h 1 Mar 2012 05:18:33 -0000 +@@ -93,7 +93,11 @@ + int new_state; + }; + ++#ifdef HYPHEN_CHROME_CLIENT ++HyphenDict *hnj_hyphen_load (const unsigned char *data, size_t size); ++#else + HyphenDict *hnj_hyphen_load (const char *fn); ++#endif + void hnj_hyphen_free (HyphenDict *dict); + + /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */ diff --git a/third_party/hyphen/hyph_en_US.dic b/third_party/hyphen/hyph_en_US.dic index e38cbce..3baa02d 100644 --- a/third_party/hyphen/hyph_en_US.dic +++ b/third_party/hyphen/hyph_en_US.dic @@ -1,6 +1,6 @@ UTF-8 LEFTHYPHENMIN 2 -RIGHTHYPHENMIN 3 +RIGHTHYPHENMIN 2 COMPOUNDLEFTHYPHENMIN 2 COMPOUNDRIGHTHYPHENMIN 3 1'. diff --git a/third_party/hyphen/hyphen.c b/third_party/hyphen/hyphen.c index 26fbefd..6b9cb78 100644 --- a/third_party/hyphen/hyphen.c +++ b/third_party/hyphen/hyphen.c @@ -242,12 +242,71 @@ get_state_str (int state) } #endif +#ifdef HYPHEN_CHROME_CLIENT +typedef struct { + const unsigned char *data; + size_t offset; + size_t size; +} hnj_file; + +static hnj_file * +hnj_fopen (const unsigned char *data, size_t size) +{ + hnj_file *f; + + f = hnj_malloc (sizeof(hnj_file)); + if (f == NULL) + return NULL; + f->offset = 0; + f->data = data; + f->size = size; + return f; +} + +static void +hnj_fclose (hnj_file *f) +{ + hnj_free (f); +} + +static char * +hnj_fgets (char *s, int size, hnj_file *f) +{ + int i; + + if (f->offset >= f->size) + return NULL; + for (i = 0; i < size - 1; i++) { + char c; + + if (f->offset >= f->size) + break; + c = f->data[f->offset++]; + if (c == '\r' || c == '\n') + break; + s[i] = c; + } + s[i] = '\0'; + return s; +} +#else +typedef FILE hnj_file; +#define hnj_fopen(fn, mode) fopen((fn), (mode)) +#define hnj_fclose(f) fclose(f) +#define hnj_fgets(s, size, f) fgets((s), (size), (f)) +#endif + +#ifdef HYPHEN_CHROME_CLIENT +HyphenDict * +hnj_hyphen_load (const unsigned char *data, size_t size) +#else HyphenDict * hnj_hyphen_load (const char *fn) +#endif { HyphenDict *dict[2]; HashTab *hashtab; - FILE *f; + hnj_file *f; char buf[MAX_CHARS]; char word[MAX_CHARS]; char pattern[MAX_CHARS]; @@ -261,7 +320,11 @@ hnj_hyphen_load (const char *fn) HashEntry *e; int nextlevel = 0; - f = fopen (fn, "r"); +#ifdef HYPHEN_CHROME_CLIENT + f = hnj_fopen (data, size); +#else + f = hnj_fopen (fn, "r"); +#endif if (f == NULL) return NULL; @@ -289,7 +352,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { /* read in character set info */ if (k == 0) { for (i=0;i<MAX_NAME;i++) dict[k]->cset[i]= 0; - if (fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { + if (hnj_fgets(dict[k]->cset, sizeof(dict[k]->cset),f) != NULL) { for (i=0;i<MAX_NAME;i++) if ((dict[k]->cset[i] == '\r') || (dict[k]->cset[i] == '\n')) dict[k]->cset[i] = 0; @@ -302,7 +365,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { dict[k]->utf8 = dict[0]->utf8; } - while (fgets (buf, sizeof(buf), f) != NULL) + while (hnj_fgets (buf, sizeof(buf), f) != NULL) { if (buf[0] != '%') { @@ -368,7 +431,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { if (dict[k]->utf8) { int pu = -1; /* unicode character position */ int ps = -1; /* unicode start position (original replindex) */ - int pc = (*word == '.') ? 1: 0; /* 8-bit character position */ + size_t pc = (*word == '.') ? 1: 0; /* 8-bit character position */ for (; pc < (strlen(word) + 1); pc++) { /* beginning of an UTF-8 character (not '10' start bits) */ if ((((unsigned char) word[pc]) >> 6) != 2) pu++; @@ -461,7 +524,7 @@ for (k = 0; k == 0 || (k == 1 && nextlevel); k++) { #endif state_num = 0; } - fclose(f); + hnj_fclose(f); if (k == 2) dict[0]->nextlevel = dict[1]; return dict[0]; } diff --git a/third_party/hyphen/hyphen.gyp b/third_party/hyphen/hyphen.gyp new file mode 100644 index 0000000..35becc4 --- /dev/null +++ b/third_party/hyphen/hyphen.gyp @@ -0,0 +1,32 @@ +# Copyright (c) 2012 The Chromium Authors. All rights reserved. +# Use of this source code is governed by a BSD-style license that can be +# found in the LICENSE file. + +{ + 'targets': [ + { + 'target_name': 'hyphen', + 'type': '<(library)', + 'include_dirs': [ + '.', + ], + 'defines': [ + 'HYPHEN_CHROME_CLIENT', + ], + 'sources': [ + 'hnjalloc.c', + 'hnjalloc.h', + 'hyphen.h', + 'hyphen.c', + ], + 'direct_dependent_settings': { + 'defines': [ + 'HYPHEN_CHROME_CLIENT', + ], + 'include_dirs': [ + '.', + ], + }, + }, + ], +} diff --git a/third_party/hyphen/hyphen.h b/third_party/hyphen/hyphen.h index 5d79308..b5517d3 100644 --- a/third_party/hyphen/hyphen.h +++ b/third_party/hyphen/hyphen.h @@ -90,7 +90,11 @@ struct _HyphenTrans { int new_state; }; +#ifdef HYPHEN_CHROME_CLIENT +HyphenDict *hnj_hyphen_load (const unsigned char *data, size_t size); +#else HyphenDict *hnj_hyphen_load (const char *fn); +#endif void hnj_hyphen_free (HyphenDict *dict); /* obsolete, use hnj_hyphen_hyphenate2() or *hyphenate3() functions) */ |