This patch removes the usage of tolower() in fts code, which is not locale neutral and causes problem in some locales such as Turkish. See http://crbug.com/15261 for details. An upstream ticket was also created for this issue: http://www.sqlite.org/src/tktview/991789d9f3136a0460dc83a33e815c1aa9757c26 Contains backport for upstream http://www.sqlite.org/src/ci/b8b465ed2c. Index: ext/fts3/fts3.c =================================================================== --- ext/fts3/fts3.c 2009-09-04 13:37:41.000000000 -0700 +++ ext/fts3/fts3.c 2009-09-14 18:17:45.000000000 -0700 @@ -326,7 +326,7 @@ return (c&0x80)==0 ? isspace(c) : 0; } static int safe_tolower(char c){ - return (c&0x80)==0 ? tolower(c) : c; + return (c>='A' && c<='Z') ? (c-'A'+'a') : c; } static int safe_isalnum(char c){ return (c&0x80)==0 ? isalnum(c) : 0; Index: ext/fts3/fts3_expr.c =================================================================== --- ext/fts3/fts3_expr.c +++ ext/fts3/fts3_expr.c @@ -58,7 +58,6 @@ int sqlite3_fts3_enable_parentheses = 0; #include "fts3_expr.h" #include "sqlite3.h" -#include #include #include @@ -84,7 +83,7 @@ struct ParseContext { ** negative values). */ static int fts3isspace(char c){ - return (c&0x80)==0 ? isspace(c) : 0; + return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; } /* Index: ext/fts3/fts3_porter.c =================================================================== --- ext/fts3/fts3_porter.c +++ ext/fts3/fts3_porter.c @@ -29,7 +29,6 @@ #include #include #include -#include #include "fts3_tokenizer.h" Index: ext/fts3/fts3_tokenizer1.c =================================================================== --- ext/fts3/fts3_tokenizer1.c +++ ext/fts3/fts3_tokenizer1.c @@ -29,7 +29,6 @@ #include #include #include -#include #include "fts3_tokenizer.h" @@ -55,6 +54,9 @@ static const sqlite3_tokenizer_module simpleTokenizerModule; static int simpleDelim(simple_tokenizer *t, unsigned char c){ return c<0x80 && t->delim[c]; } +static int fts3_isalnum(int x){ + return (x>='0' && x<='9') || (x>='A' && x<='Z') || (x>='a' && x<='z'); +} /* ** Create a new tokenizer instance. @@ -89,7 +91,7 @@ static int simpleCreate( /* Mark non-alphanumeric ASCII characters as delimiters */ int i; for(i=1; i<0x80; i++){ - t->delim[i] = !isalnum(i); + t->delim[i] = !fts3_isalnum(i); } } @@ -191,7 +193,7 @@ static int simpleNext( ** case-insensitivity. */ unsigned char ch = p[iStartOffset+i]; - c->pToken[i] = ch<0x80 ? tolower(ch) : ch; + c->pToken[i] = (ch>='A' && ch<='Z') ? ch-'A'+'a' : ch; } *ppToken = c->pToken; *pnBytes = n; Index: ext/fts1/simple_tokenizer.c =================================================================== --- ext/fts1/simple_tokenizer.c 2009-09-03 13:32:06.000000000 -0700 +++ ext/fts1/simple_tokenizer.c 2009-09-02 11:40:21.000000000 -0700 @@ -138,7 +138,7 @@ ** case-insensitivity. */ char ch = c->pCurrent[ii]; - c->zToken[ii] = (unsigned char)ch<0x80 ? tolower(ch) : ch; + c->zToken[ii] = ((ch>='A' && ch<='Z') ? (ch-'A'+'a') : ch); } c->zToken[n] = '\0'; *ppToken = c->zToken; Index: ext/fts1/fts1_tokenizer1.c =================================================================== --- ext/fts1/fts1_tokenizer1.c 2009-09-03 13:32:06.000000000 -0700 +++ ext/fts1/fts1_tokenizer1.c 2009-09-02 11:40:21.000000000 -0700 @@ -182,7 +182,7 @@ ** case-insensitivity. */ unsigned char ch = p[iStartOffset+i]; - c->pToken[i] = ch<0x80 ? tolower(ch) : ch; + c->pToken[i] = (ch>='A' && ch<='Z') ? (ch-'A'+'a') : ch; } *ppToken = c->pToken; *pnBytes = n; Index: ext/fts1/fts1.c =================================================================== --- ext/fts1/fts1.c 2009-09-04 13:37:41.000000000 -0700 +++ ext/fts1/fts1.c 2009-09-14 18:16:55.000000000 -0700 @@ -208,7 +208,7 @@ return (c&0x80)==0 ? isspace(c) : 0; } static int safe_tolower(char c){ - return (c&0x80)==0 ? tolower(c) : c; + return (c>='A' && c<='Z') ? (c-'A'+'a') : c; } static int safe_isalnum(char c){ return (c&0x80)==0 ? isalnum(c) : 0; Index: ext/fts2/fts2.c =================================================================== --- ext/fts2/fts2.c 2009-09-04 13:37:41.000000000 -0700 +++ ext/fts2/fts2.c 2009-09-14 18:17:02.000000000 -0700 @@ -372,7 +372,7 @@ return (c&0x80)==0 ? isspace(c) : 0; } static int safe_tolower(char c){ - return (c&0x80)==0 ? tolower(c) : c; + return (c>='A' && c<='Z') ? (c-'A'+'a') : c; } static int safe_isalnum(char c){ return (c&0x80)==0 ? isalnum(c) : 0; Index: ext/fts2/fts2_tokenizer1.c =================================================================== --- ext/fts2/fts2_tokenizer1.c 2009-09-03 13:32:06.000000000 -0700 +++ ext/fts2/fts2_tokenizer1.c 2009-09-02 11:40:21.000000000 -0700 @@ -191,7 +191,7 @@ ** case-insensitivity. */ unsigned char ch = p[iStartOffset+i]; - c->pToken[i] = ch<0x80 ? tolower(ch) : ch; + c->pToken[i] = (ch>='A' && ch<='Z') ? (ch-'A'+'a') : ch; } *ppToken = c->pToken; *pnBytes = n;