diff options
author | Bruno Haible <bruno@clisp.org> | 2008-05-18 13:24:51 +0000 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2009-06-23 12:15:43 +0200 |
commit | 8a7987e481cedead8460231d96f942645e826365 (patch) | |
tree | a307c0b22c8a30855b626c7e0dd4bf66ff1a2139 /gnulib-local | |
parent | 75a8381828e38346a51c87c2aa498ff2df84dfae (diff) | |
download | external_gettext-8a7987e481cedead8460231d96f942645e826365.zip external_gettext-8a7987e481cedead8460231d96f942645e826365.tar.gz external_gettext-8a7987e481cedead8460231d96f942645e826365.tar.bz2 |
Improve test for containment of name in the translation.
Diffstat (limited to 'gnulib-local')
-rw-r--r-- | gnulib-local/ChangeLog | 17 | ||||
-rw-r--r-- | gnulib-local/lib/propername.c | 142 | ||||
-rw-r--r-- | gnulib-local/modules/propername | 7 |
3 files changed, 156 insertions, 10 deletions
diff --git a/gnulib-local/ChangeLog b/gnulib-local/ChangeLog index d8abc0f..499798c 100644 --- a/gnulib-local/ChangeLog +++ b/gnulib-local/ChangeLog @@ -1,3 +1,20 @@ +2008-05-18 Bruno Haible <bruno@clisp.org> + + * lib/propername.c: Include <stdbool.h>, <ctype.h>, trim.h, mbchar.h, + mbuiter.h. Don't include c-strstr.h. + (mbsstr_trimmed_wordbounded): New function. + (proper_name, proper_name_utf8): Use it instead of mbsstr or c_strstr. + * modules/propername (Depends-on): Add stdbool, trim, mbchar, mbuiter. + Remove c-strstr. + Reported by Paul Eggert <eggert@cs.ucla.edu>. + +2008-05-18 Bruno Haible <bruno@clisp.org> + + * modules/propername (Notice): New field. + (configure.ac): Invoke AM_GETTEXT_OPTION. + * lib/propername.h: Add more comments. + Reported by Ben Pfaff <blp@cs.stanford.edu>. + 2008-05-11 Bruno Haible <bruno@clisp.org> * lib/gen-lbrkprop.c: Move to gnulib as lib/unilbrk/gen-lbrk.c. diff --git a/gnulib-local/lib/propername.c b/gnulib-local/lib/propername.c index 38d1619..0d3681e 100644 --- a/gnulib-local/lib/propername.c +++ b/gnulib-local/lib/propername.c @@ -1,5 +1,5 @@ /* Localization of proper names. - Copyright (C) 2006-2007 Free Software Foundation, Inc. + Copyright (C) 2006-2008 Free Software Foundation, Inc. Written by Bruno Haible <bruno@clisp.org>, 2006. This program is free software: you can redistribute it and/or modify @@ -20,6 +20,8 @@ /* Specification. */ #include "propername.h" +#include <ctype.h> +#include <stdbool.h> #include <stdio.h> #include <stdlib.h> #include <string.h> @@ -27,14 +29,127 @@ # include <iconv.h> #endif +#include "trim.h" +#include "mbchar.h" +#if HAVE_MBRTOWC +# include "mbuiter.h" +#endif #include "localcharset.h" #include "c-strcase.h" #include "xstriconv.h" -#include "c-strstr.h" #include "xalloc.h" #include "gettext.h" +/* Tests whether STRING contains trim (SUB), starting and ending at word + boundaries. + Here, instead of implementing Unicode Standard Annex #29 for determining + word boundaries, we assume that trim (SUB) starts and ends with words and + only test whether the part before it ends with a non-word and the part + after it starts with a non-word. */ +static bool +mbsstr_trimmed_wordbounded (const char *string, const char *sub) +{ + char *tsub = trim (sub); + bool found = false; + + for (; *string != '\0';) + { + const char *tsub_in_string = mbsstr (string, tsub); + if (tsub_in_string == NULL) + break; + else + { +#if HAVE_MBRTOWC + if (MB_CUR_MAX > 1) + { + mbui_iterator_t string_iter; + bool word_boundary_before; + bool word_boundary_after; + + mbui_init (string_iter, string); + word_boundary_before = true; + if (mbui_cur_ptr (string_iter) < tsub_in_string) + { + mbchar_t last_char_before_tsub; + do + { + if (!mbui_avail (string_iter)) + abort (); + last_char_before_tsub = mbui_cur (string_iter); + mbui_advance (string_iter); + } + while (mbui_cur_ptr (string_iter) < tsub_in_string); + if (mb_isalnum (last_char_before_tsub)) + word_boundary_before = false; + } + + mbui_init (string_iter, tsub_in_string); + { + mbui_iterator_t tsub_iter; + + for (mbui_init (tsub_iter, tsub); + mbui_avail (tsub_iter); + mbui_advance (tsub_iter)) + { + if (!mbui_avail (string_iter)) + abort (); + mbui_advance (string_iter); + } + } + word_boundary_after = true; + if (mbui_avail (string_iter)) + { + mbchar_t first_char_after_tsub = mbui_cur (string_iter); + if (mb_isalnum (first_char_after_tsub)) + word_boundary_after = false; + } + + if (word_boundary_before && word_boundary_after) + { + found = true; + break; + } + + mbui_init (string_iter, tsub_in_string); + if (!mbui_avail (string_iter)) + break; + string = tsub_in_string + mb_len (mbui_cur (string_iter)); + } + else +#endif /* HAVE_MBRTOWC */ + { + bool word_boundary_before; + const char *p; + bool word_boundary_after; + + word_boundary_before = true; + if (string < tsub_in_string) + if (isalnum ((unsigned char) tsub_in_string[-1])) + word_boundary_before = false; + + p = tsub_in_string + strlen (tsub); + word_boundary_after = true; + if (*p != '\0') + if (isalnum ((unsigned char) *p)) + word_boundary_after = false; + + if (word_boundary_before && word_boundary_after) + { + found = true; + break; + } + + if (*tsub_in_string == '\0') + break; + string = tsub_in_string + 1; + } + } + } + free (tsub); + return found; +} + /* Return the localization of NAME. NAME is written in ASCII. */ const char * @@ -46,7 +161,7 @@ proper_name (const char *name) if (translation != name) { /* See whether the translation contains the original name. */ - if (mbsstr (translation, name) != NULL) + if (mbsstr_trimmed_wordbounded (translation, name)) return translation; else { @@ -116,13 +231,12 @@ proper_name_utf8 (const char *name_ascii, const char *name_utf8) if (translation != name_ascii) { - /* See whether the translation contains the original name. - The multibyte-aware mbsstr() is not absolutely necessary here. */ - if (c_strstr (translation, name_ascii) != NULL + /* See whether the translation contains the original name. */ + if (mbsstr_trimmed_wordbounded (translation, name_ascii) || (name_converted != NULL - && mbsstr (translation, name_converted) != NULL) + && mbsstr_trimmed_wordbounded (translation, name_converted)) || (name_converted_translit != NULL - && mbsstr (translation, name_converted_translit) != NULL)) + && mbsstr_trimmed_wordbounded (translation, name_converted_translit))) { if (alloc_name_converted != NULL) free (alloc_name_converted); @@ -155,3 +269,15 @@ proper_name_utf8 (const char *name_ascii, const char *name_utf8) return name; } } + +#ifdef TEST +# include <locale.h> +int +main (int argc, char *argv[]) +{ + setlocale (LC_ALL, ""); + if (mbsstr_trimmed_wordbounded (argv[1], argv[2])) + printf("found\n"); + return 0; +} +#endif diff --git a/gnulib-local/modules/propername b/gnulib-local/modules/propername index aef3dac..fb31901 100644 --- a/gnulib-local/modules/propername +++ b/gnulib-local/modules/propername @@ -12,12 +12,15 @@ lib/propername.h lib/propername.c Depends-on: +stdbool +trim +mbsstr +mbchar +mbuiter iconv localcharset c-strcase xstriconv -c-strstr -mbsstr xalloc gettext-h |