diff options
Diffstat (limited to 'third_party/sqlite/src/tool/mkkeywordhash.c')
-rw-r--r-- | third_party/sqlite/src/tool/mkkeywordhash.c | 595 |
1 files changed, 595 insertions, 0 deletions
diff --git a/third_party/sqlite/src/tool/mkkeywordhash.c b/third_party/sqlite/src/tool/mkkeywordhash.c new file mode 100644 index 0000000..58e10c8 --- /dev/null +++ b/third_party/sqlite/src/tool/mkkeywordhash.c @@ -0,0 +1,595 @@ +/* +** Compile and run this standalone program in order to generate code that +** implements a function that will translate alphabetic identifiers into +** parser token codes. +*/ +#include <stdio.h> +#include <string.h> +#include <stdlib.h> +#include <assert.h> + +/* +** A header comment placed at the beginning of generated code. +*/ +static const char zHdr[] = + "/***** This file contains automatically generated code ******\n" + "**\n" + "** The code in this file has been automatically generated by\n" + "**\n" + "** $Header: /home/drh/sqlite/trans/cvs/sqlite/sqlite/tool/mkkeywordhash.c,v 1.38 2009/06/09 14:27:41 drh Exp $\n" + "**\n" + "** The code in this file implements a function that determines whether\n" + "** or not a given identifier is really an SQL keyword. The same thing\n" + "** might be implemented more directly using a hand-written hash table.\n" + "** But by using this automatically generated code, the size of the code\n" + "** is substantially reduced. This is important for embedded applications\n" + "** on platforms with limited memory.\n" + "*/\n" +; + +/* +** All the keywords of the SQL language are stored in a hash +** table composed of instances of the following structure. +*/ +typedef struct Keyword Keyword; +struct Keyword { + char *zName; /* The keyword name */ + char *zTokenType; /* Token value for this keyword */ + int mask; /* Code this keyword if non-zero */ + int id; /* Unique ID for this record */ + int hash; /* Hash on the keyword */ + int offset; /* Offset to start of name string */ + int len; /* Length of this keyword, not counting final \000 */ + int prefix; /* Number of characters in prefix */ + int longestSuffix; /* Longest suffix that is a prefix on another word */ + int iNext; /* Index in aKeywordTable[] of next with same hash */ + int substrId; /* Id to another keyword this keyword is embedded in */ + int substrOffset; /* Offset into substrId for start of this keyword */ + char zOrigName[20]; /* Original keyword name before processing */ +}; + +/* +** Define masks used to determine which keywords are allowed +*/ +#ifdef SQLITE_OMIT_ALTERTABLE +# define ALTER 0 +#else +# define ALTER 0x00000001 +#endif +#define ALWAYS 0x00000002 +#ifdef SQLITE_OMIT_ANALYZE +# define ANALYZE 0 +#else +# define ANALYZE 0x00000004 +#endif +#ifdef SQLITE_OMIT_ATTACH +# define ATTACH 0 +#else +# define ATTACH 0x00000008 +#endif +#ifdef SQLITE_OMIT_AUTOINCREMENT +# define AUTOINCR 0 +#else +# define AUTOINCR 0x00000010 +#endif +#ifdef SQLITE_OMIT_CAST +# define CAST 0 +#else +# define CAST 0x00000020 +#endif +#ifdef SQLITE_OMIT_COMPOUND_SELECT +# define COMPOUND 0 +#else +# define COMPOUND 0x00000040 +#endif +#ifdef SQLITE_OMIT_CONFLICT_CLAUSE +# define CONFLICT 0 +#else +# define CONFLICT 0x00000080 +#endif +#ifdef SQLITE_OMIT_EXPLAIN +# define EXPLAIN 0 +#else +# define EXPLAIN 0x00000100 +#endif +#ifdef SQLITE_OMIT_FOREIGN_KEY +# define FKEY 0 +#else +# define FKEY 0x00000200 +#endif +#ifdef SQLITE_OMIT_PRAGMA +# define PRAGMA 0 +#else +# define PRAGMA 0x00000400 +#endif +#ifdef SQLITE_OMIT_REINDEX +# define REINDEX 0 +#else +# define REINDEX 0x00000800 +#endif +#ifdef SQLITE_OMIT_SUBQUERY +# define SUBQUERY 0 +#else +# define SUBQUERY 0x00001000 +#endif +#ifdef SQLITE_OMIT_TRIGGER +# define TRIGGER 0 +#else +# define TRIGGER 0x00002000 +#endif +#if defined(SQLITE_OMIT_AUTOVACUUM) && \ + (defined(SQLITE_OMIT_VACUUM) || defined(SQLITE_OMIT_ATTACH)) +# define VACUUM 0 +#else +# define VACUUM 0x00004000 +#endif +#ifdef SQLITE_OMIT_VIEW +# define VIEW 0 +#else +# define VIEW 0x00008000 +#endif +#ifdef SQLITE_OMIT_VIRTUALTABLE +# define VTAB 0 +#else +# define VTAB 0x00010000 +#endif +#ifdef SQLITE_OMIT_AUTOVACUUM +# define AUTOVACUUM 0 +#else +# define AUTOVACUUM 0x00020000 +#endif + +/* +** These are the keywords +*/ +static Keyword aKeywordTable[] = { + { "ABORT", "TK_ABORT", CONFLICT|TRIGGER }, + { "ADD", "TK_ADD", ALTER }, + { "AFTER", "TK_AFTER", TRIGGER }, + { "ALL", "TK_ALL", ALWAYS }, + { "ALTER", "TK_ALTER", ALTER }, + { "ANALYZE", "TK_ANALYZE", ANALYZE }, + { "AND", "TK_AND", ALWAYS }, + { "AS", "TK_AS", ALWAYS }, + { "ASC", "TK_ASC", ALWAYS }, + { "ATTACH", "TK_ATTACH", ATTACH }, + { "AUTOINCREMENT", "TK_AUTOINCR", AUTOINCR }, + { "BEFORE", "TK_BEFORE", TRIGGER }, + { "BEGIN", "TK_BEGIN", ALWAYS }, + { "BETWEEN", "TK_BETWEEN", ALWAYS }, + { "BY", "TK_BY", ALWAYS }, + { "CASCADE", "TK_CASCADE", FKEY }, + { "CASE", "TK_CASE", ALWAYS }, + { "CAST", "TK_CAST", CAST }, + { "CHECK", "TK_CHECK", ALWAYS }, + { "COLLATE", "TK_COLLATE", ALWAYS }, + { "COLUMN", "TK_COLUMNKW", ALTER }, + { "COMMIT", "TK_COMMIT", ALWAYS }, + { "CONFLICT", "TK_CONFLICT", CONFLICT }, + { "CONSTRAINT", "TK_CONSTRAINT", ALWAYS }, + { "CREATE", "TK_CREATE", ALWAYS }, + { "CROSS", "TK_JOIN_KW", ALWAYS }, + { "CURRENT_DATE", "TK_CTIME_KW", ALWAYS }, + { "CURRENT_TIME", "TK_CTIME_KW", ALWAYS }, + { "CURRENT_TIMESTAMP","TK_CTIME_KW", ALWAYS }, + { "DATABASE", "TK_DATABASE", ATTACH }, + { "DEFAULT", "TK_DEFAULT", ALWAYS }, + { "DEFERRED", "TK_DEFERRED", ALWAYS }, + { "DEFERRABLE", "TK_DEFERRABLE", FKEY }, + { "DELETE", "TK_DELETE", ALWAYS }, + { "DESC", "TK_DESC", ALWAYS }, + { "DETACH", "TK_DETACH", ATTACH }, + { "DISTINCT", "TK_DISTINCT", ALWAYS }, + { "DROP", "TK_DROP", ALWAYS }, + { "END", "TK_END", ALWAYS }, + { "EACH", "TK_EACH", TRIGGER }, + { "ELSE", "TK_ELSE", ALWAYS }, + { "ESCAPE", "TK_ESCAPE", ALWAYS }, + { "EXCEPT", "TK_EXCEPT", COMPOUND }, + { "EXCLUSIVE", "TK_EXCLUSIVE", ALWAYS }, + { "EXISTS", "TK_EXISTS", ALWAYS }, + { "EXPLAIN", "TK_EXPLAIN", EXPLAIN }, + { "FAIL", "TK_FAIL", CONFLICT|TRIGGER }, + { "FOR", "TK_FOR", TRIGGER }, + { "FOREIGN", "TK_FOREIGN", FKEY }, + { "FROM", "TK_FROM", ALWAYS }, + { "FULL", "TK_JOIN_KW", ALWAYS }, + { "GLOB", "TK_LIKE_KW", ALWAYS }, + { "GROUP", "TK_GROUP", ALWAYS }, + { "HAVING", "TK_HAVING", ALWAYS }, + { "IF", "TK_IF", ALWAYS }, + { "IGNORE", "TK_IGNORE", CONFLICT|TRIGGER }, + { "IMMEDIATE", "TK_IMMEDIATE", ALWAYS }, + { "IN", "TK_IN", ALWAYS }, + { "INDEX", "TK_INDEX", ALWAYS }, + { "INDEXED", "TK_INDEXED", ALWAYS }, + { "INITIALLY", "TK_INITIALLY", FKEY }, + { "INNER", "TK_JOIN_KW", ALWAYS }, + { "INSERT", "TK_INSERT", ALWAYS }, + { "INSTEAD", "TK_INSTEAD", TRIGGER }, + { "INTERSECT", "TK_INTERSECT", COMPOUND }, + { "INTO", "TK_INTO", ALWAYS }, + { "IS", "TK_IS", ALWAYS }, + { "ISNULL", "TK_ISNULL", ALWAYS }, + { "JOIN", "TK_JOIN", ALWAYS }, + { "KEY", "TK_KEY", ALWAYS }, + { "LEFT", "TK_JOIN_KW", ALWAYS }, + { "LIKE", "TK_LIKE_KW", ALWAYS }, + { "LIMIT", "TK_LIMIT", ALWAYS }, + { "MATCH", "TK_MATCH", ALWAYS }, + { "NATURAL", "TK_JOIN_KW", ALWAYS }, + { "NOT", "TK_NOT", ALWAYS }, + { "NOTNULL", "TK_NOTNULL", ALWAYS }, + { "NULL", "TK_NULL", ALWAYS }, + { "OF", "TK_OF", ALWAYS }, + { "OFFSET", "TK_OFFSET", ALWAYS }, + { "ON", "TK_ON", ALWAYS }, + { "OR", "TK_OR", ALWAYS }, + { "ORDER", "TK_ORDER", ALWAYS }, + { "OUTER", "TK_JOIN_KW", ALWAYS }, + { "PLAN", "TK_PLAN", EXPLAIN }, + { "PRAGMA", "TK_PRAGMA", PRAGMA }, + { "PRIMARY", "TK_PRIMARY", ALWAYS }, + { "QUERY", "TK_QUERY", EXPLAIN }, + { "RAISE", "TK_RAISE", TRIGGER }, + { "REFERENCES", "TK_REFERENCES", FKEY }, + { "REGEXP", "TK_LIKE_KW", ALWAYS }, + { "REINDEX", "TK_REINDEX", REINDEX }, + { "RELEASE", "TK_RELEASE", ALWAYS }, + { "RENAME", "TK_RENAME", ALTER }, + { "REPLACE", "TK_REPLACE", CONFLICT }, + { "RESTRICT", "TK_RESTRICT", FKEY }, + { "RIGHT", "TK_JOIN_KW", ALWAYS }, + { "ROLLBACK", "TK_ROLLBACK", ALWAYS }, + { "ROW", "TK_ROW", TRIGGER }, + { "SAVEPOINT", "TK_SAVEPOINT", ALWAYS }, + { "SELECT", "TK_SELECT", ALWAYS }, + { "SET", "TK_SET", ALWAYS }, + { "TABLE", "TK_TABLE", ALWAYS }, + { "TEMP", "TK_TEMP", ALWAYS }, + { "TEMPORARY", "TK_TEMP", ALWAYS }, + { "THEN", "TK_THEN", ALWAYS }, + { "TO", "TK_TO", ALWAYS }, + { "TRANSACTION", "TK_TRANSACTION", ALWAYS }, + { "TRIGGER", "TK_TRIGGER", TRIGGER }, + { "UNION", "TK_UNION", COMPOUND }, + { "UNIQUE", "TK_UNIQUE", ALWAYS }, + { "UPDATE", "TK_UPDATE", ALWAYS }, + { "USING", "TK_USING", ALWAYS }, + { "VACUUM", "TK_VACUUM", VACUUM }, + { "VALUES", "TK_VALUES", ALWAYS }, + { "VIEW", "TK_VIEW", VIEW }, + { "VIRTUAL", "TK_VIRTUAL", VTAB }, + { "WHEN", "TK_WHEN", ALWAYS }, + { "WHERE", "TK_WHERE", ALWAYS }, +}; + +/* Number of keywords */ +static int nKeyword = (sizeof(aKeywordTable)/sizeof(aKeywordTable[0])); + +/* An array to map all upper-case characters into their corresponding +** lower-case character. +*/ +const unsigned char sqlite3UpperToLower[] = { + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, + 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, + 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, + 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 97, 98, 99,100,101,102,103, + 104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121, + 122, 91, 92, 93, 94, 95, 96, 97, 98, 99,100,101,102,103,104,105,106,107, + 108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125, + 126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143, + 144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161, + 162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179, + 180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197, + 198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215, + 216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233, + 234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251, + 252,253,254,255 +}; +#define UpperToLower sqlite3UpperToLower + +/* +** Comparision function for two Keyword records +*/ +static int keywordCompare1(const void *a, const void *b){ + const Keyword *pA = (Keyword*)a; + const Keyword *pB = (Keyword*)b; + int n = pA->len - pB->len; + if( n==0 ){ + n = strcmp(pA->zName, pB->zName); + } + return n; +} +static int keywordCompare2(const void *a, const void *b){ + const Keyword *pA = (Keyword*)a; + const Keyword *pB = (Keyword*)b; + int n = pB->longestSuffix - pA->longestSuffix; + if( n==0 ){ + n = strcmp(pA->zName, pB->zName); + } + return n; +} +static int keywordCompare3(const void *a, const void *b){ + const Keyword *pA = (Keyword*)a; + const Keyword *pB = (Keyword*)b; + int n = pA->offset - pB->offset; + return n; +} + +/* +** Return a KeywordTable entry with the given id +*/ +static Keyword *findById(int id){ + int i; + for(i=0; i<nKeyword; i++){ + if( aKeywordTable[i].id==id ) break; + } + return &aKeywordTable[i]; +} + +/* +** This routine does the work. The generated code is printed on standard +** output. +*/ +int main(int argc, char **argv){ + int i, j, k, h; + int bestSize, bestCount; + int count; + int nChar; + int totalLen = 0; + int aHash[1000]; /* 1000 is much bigger than nKeyword */ + char zText[2000]; + + /* Remove entries from the list of keywords that have mask==0 */ + for(i=j=0; i<nKeyword; i++){ + if( aKeywordTable[i].mask==0 ) continue; + if( j<i ){ + aKeywordTable[j] = aKeywordTable[i]; + } + j++; + } + nKeyword = j; + + /* Fill in the lengths of strings and hashes for all entries. */ + for(i=0; i<nKeyword; i++){ + Keyword *p = &aKeywordTable[i]; + p->len = strlen(p->zName); + assert( p->len<sizeof(p->zOrigName) ); + strcpy(p->zOrigName, p->zName); + totalLen += p->len; + p->hash = (UpperToLower[(int)p->zName[0]]*4) ^ + (UpperToLower[(int)p->zName[p->len-1]]*3) ^ p->len; + p->id = i+1; + } + + /* Sort the table from shortest to longest keyword */ + qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare1); + + /* Look for short keywords embedded in longer keywords */ + for(i=nKeyword-2; i>=0; i--){ + Keyword *p = &aKeywordTable[i]; + for(j=nKeyword-1; j>i && p->substrId==0; j--){ + Keyword *pOther = &aKeywordTable[j]; + if( pOther->substrId ) continue; + if( pOther->len<=p->len ) continue; + for(k=0; k<=pOther->len-p->len; k++){ + if( memcmp(p->zName, &pOther->zName[k], p->len)==0 ){ + p->substrId = pOther->id; + p->substrOffset = k; + break; + } + } + } + } + + /* Compute the longestSuffix value for every word */ + for(i=0; i<nKeyword; i++){ + Keyword *p = &aKeywordTable[i]; + if( p->substrId ) continue; + for(j=0; j<nKeyword; j++){ + Keyword *pOther; + if( j==i ) continue; + pOther = &aKeywordTable[j]; + if( pOther->substrId ) continue; + for(k=p->longestSuffix+1; k<p->len && k<pOther->len; k++){ + if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){ + p->longestSuffix = k; + } + } + } + } + + /* Sort the table into reverse order by length */ + qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare2); + + /* Fill in the offset for all entries */ + nChar = 0; + for(i=0; i<nKeyword; i++){ + Keyword *p = &aKeywordTable[i]; + if( p->offset>0 || p->substrId ) continue; + p->offset = nChar; + nChar += p->len; + for(k=p->len-1; k>=1; k--){ + for(j=i+1; j<nKeyword; j++){ + Keyword *pOther = &aKeywordTable[j]; + if( pOther->offset>0 || pOther->substrId ) continue; + if( pOther->len<=k ) continue; + if( memcmp(&p->zName[p->len-k], pOther->zName, k)==0 ){ + p = pOther; + p->offset = nChar - k; + nChar = p->offset + p->len; + p->zName += k; + p->len -= k; + p->prefix = k; + j = i; + k = p->len; + } + } + } + } + for(i=0; i<nKeyword; i++){ + Keyword *p = &aKeywordTable[i]; + if( p->substrId ){ + p->offset = findById(p->substrId)->offset + p->substrOffset; + } + } + + /* Sort the table by offset */ + qsort(aKeywordTable, nKeyword, sizeof(aKeywordTable[0]), keywordCompare3); + + /* Figure out how big to make the hash table in order to minimize the + ** number of collisions */ + bestSize = nKeyword; + bestCount = nKeyword*nKeyword; + for(i=nKeyword/2; i<=2*nKeyword; i++){ + for(j=0; j<i; j++) aHash[j] = 0; + for(j=0; j<nKeyword; j++){ + h = aKeywordTable[j].hash % i; + aHash[h] *= 2; + aHash[h]++; + } + for(j=count=0; j<i; j++) count += aHash[j]; + if( count<bestCount ){ + bestCount = count; + bestSize = i; + } + } + + /* Compute the hash */ + for(i=0; i<bestSize; i++) aHash[i] = 0; + for(i=0; i<nKeyword; i++){ + h = aKeywordTable[i].hash % bestSize; + aKeywordTable[i].iNext = aHash[h]; + aHash[h] = i+1; + } + + /* Begin generating code */ + printf("%s", zHdr); + printf("/* Hash score: %d */\n", bestCount); + printf("static int keywordCode(const char *z, int n){\n"); + printf(" /* zText[] encodes %d bytes of keywords in %d bytes */\n", + totalLen + nKeyword, nChar+1 ); + for(i=j=k=0; i<nKeyword; i++){ + Keyword *p = &aKeywordTable[i]; + if( p->substrId ) continue; + memcpy(&zText[k], p->zName, p->len); + k += p->len; + if( j+p->len>70 ){ + printf("%*s */\n", 74-j, ""); + j = 0; + } + if( j==0 ){ + printf(" /* "); + j = 8; + } + printf("%s", p->zName); + j += p->len; + } + if( j>0 ){ + printf("%*s */\n", 74-j, ""); + } + printf(" static const char zText[%d] = {\n", nChar); + zText[nChar] = 0; + for(i=j=0; i<k; i++){ + if( j==0 ){ + printf(" "); + } + if( zText[i]==0 ){ + printf("0"); + }else{ + printf("'%c',", zText[i]); + } + j += 4; + if( j>68 ){ + printf("\n"); + j = 0; + } + } + if( j>0 ) printf("\n"); + printf(" };\n"); + + printf(" static const unsigned char aHash[%d] = {\n", bestSize); + for(i=j=0; i<bestSize; i++){ + if( j==0 ) printf(" "); + printf(" %3d,", aHash[i]); + j++; + if( j>12 ){ + printf("\n"); + j = 0; + } + } + printf("%s };\n", j==0 ? "" : "\n"); + + printf(" static const unsigned char aNext[%d] = {\n", nKeyword); + for(i=j=0; i<nKeyword; i++){ + if( j==0 ) printf(" "); + printf(" %3d,", aKeywordTable[i].iNext); + j++; + if( j>12 ){ + printf("\n"); + j = 0; + } + } + printf("%s };\n", j==0 ? "" : "\n"); + + printf(" static const unsigned char aLen[%d] = {\n", nKeyword); + for(i=j=0; i<nKeyword; i++){ + if( j==0 ) printf(" "); + printf(" %3d,", aKeywordTable[i].len+aKeywordTable[i].prefix); + j++; + if( j>12 ){ + printf("\n"); + j = 0; + } + } + printf("%s };\n", j==0 ? "" : "\n"); + + printf(" static const unsigned short int aOffset[%d] = {\n", nKeyword); + for(i=j=0; i<nKeyword; i++){ + if( j==0 ) printf(" "); + printf(" %3d,", aKeywordTable[i].offset); + j++; + if( j>12 ){ + printf("\n"); + j = 0; + } + } + printf("%s };\n", j==0 ? "" : "\n"); + + printf(" static const unsigned char aCode[%d] = {\n", nKeyword); + for(i=j=0; i<nKeyword; i++){ + char *zToken = aKeywordTable[i].zTokenType; + if( j==0 ) printf(" "); + printf("%s,%*s", zToken, (int)(14-strlen(zToken)), ""); + j++; + if( j>=5 ){ + printf("\n"); + j = 0; + } + } + printf("%s };\n", j==0 ? "" : "\n"); + + printf(" int h, i;\n"); + printf(" if( n<2 ) return TK_ID;\n"); + printf(" h = ((charMap(z[0])*4) ^\n" + " (charMap(z[n-1])*3) ^\n" + " n) %% %d;\n", bestSize); + printf(" for(i=((int)aHash[h])-1; i>=0; i=((int)aNext[i])-1){\n"); + printf(" if( aLen[i]==n &&" + " sqlite3StrNICmp(&zText[aOffset[i]],z,n)==0 ){\n"); + for(i=0; i<nKeyword; i++){ + printf(" testcase( i==%d ); /* %s */\n", + i, aKeywordTable[i].zOrigName); + } + printf(" return aCode[i];\n"); + printf(" }\n"); + printf(" }\n"); + printf(" return TK_ID;\n"); + printf("}\n"); + printf("int sqlite3KeywordCode(const unsigned char *z, int n){\n"); + printf(" return keywordCode((char*)z, n);\n"); + printf("}\n"); + + return 0; +} |