diff options
author | Bruno Haible <bruno@clisp.org> | 2005-10-07 11:21:50 +0000 |
---|---|---|
committer | Bruno Haible <bruno@clisp.org> | 2009-06-23 12:12:53 +0200 |
commit | 135b998c2230a94a19e4d66f3da1ea1a68f0728e (patch) | |
tree | 62d71a4fa6b867a9ffa19966e1a403f009a1d193 /gettext-tools/lib | |
parent | f1f805bcf63cd5073235be63957b064899427516 (diff) | |
download | external_gettext-135b998c2230a94a19e4d66f3da1ea1a68f0728e.zip external_gettext-135b998c2230a94a19e4d66f3da1ea1a68f0728e.tar.gz external_gettext-135b998c2230a94a19e4d66f3da1ea1a68f0728e.tar.bz2 |
Revamp the hash table API.
Diffstat (limited to 'gettext-tools/lib')
-rw-r--r-- | gettext-tools/lib/hash.c | 320 | ||||
-rw-r--r-- | gettext-tools/lib/hash.h | 60 |
2 files changed, 229 insertions, 151 deletions
diff --git a/gettext-tools/lib/hash.c b/gettext-tools/lib/hash.c index bf3d0bb..2a96210 100644 --- a/gettext-tools/lib/hash.c +++ b/gettext-tools/lib/hash.c @@ -29,37 +29,67 @@ #include <limits.h> #include <sys/types.h> -#include <obstack.h> +/* Since this simple implementation of hash tables allows only insertion, no + removal of entries, the right data structure for the memory holding all keys + is an obstack. */ +#include "obstack.h" +/* Use checked memory allocation. */ #include "xalloc.h" #define obstack_chunk_alloc xmalloc #define obstack_chunk_free free + typedef struct hash_entry { - unsigned long used; - const void *key; + unsigned long used; /* Hash code of the key, or 0 for an unused entry. */ + const void *key; /* Key. */ size_t keylen; - void *data; + void *data; /* Value. */ struct hash_entry *next; } hash_entry; -/* Forward declaration of local functions. */ -static void insert_entry_2 (hash_table *htab, - const void *key, size_t keylen, - unsigned long int hval, size_t idx, void *data); -static void resize (hash_table *htab); -static size_t lookup (hash_table *htab, - const void *key, size_t keylen, - unsigned long int hval); -static unsigned long compute_hashval (const void *key, size_t keylen); -static int is_prime (unsigned long int candidate); + +/* Given an odd CANDIDATE > 1, return true if it is a prime number. */ +static int +is_prime (unsigned long int candidate) +{ + /* No even number and none less than 10 will be passed here. */ + unsigned long int divn = 3; + unsigned long int sq = divn * divn; + + while (sq < candidate && candidate % divn != 0) + { + ++divn; + sq += 4 * divn; + ++divn; + } + + return candidate % divn != 0; +} + + +/* Given SEED > 1, return the smallest odd prime number >= SEED. */ +unsigned long +next_prime (unsigned long int seed) +{ + /* Make it definitely odd. */ + seed |= 1; + + while (!is_prime (seed)) + seed += 2; + + return seed; +} +/* Initialize a hash table. INIT_SIZE > 1 is the initial number of available + entries. + Return 0 upon successful completion, -1 upon memory allocation error. */ int -init_hash (hash_table *htab, unsigned long int init_size) +hash_init (hash_table *htab, unsigned long int init_size) { /* We need the size to be a prime. */ init_size = next_prime (init_size); @@ -76,8 +106,10 @@ init_hash (hash_table *htab, unsigned long int init_size) } +/* Delete a hash table's contents. + Return 0 always. */ int -delete_hash (hash_table *htab) +hash_destroy (hash_table *htab) { free (htab->table); obstack_free (&htab->mem_pool, NULL); @@ -85,28 +117,96 @@ delete_hash (hash_table *htab) } -int -insert_entry (hash_table *htab, const void *key, size_t keylen, void *data) +/* Compute a hash code for a key consisting of KEYLEN bytes starting at KEY + in memory. */ +static unsigned long +compute_hashval (const void *key, size_t keylen) { - unsigned long int hval = compute_hashval (key, keylen); + size_t cnt; + unsigned long int hval; + + /* Compute the hash value for the given string. The algorithm + is taken from [Aho,Sethi,Ullman], fixed according to + http://www.haible.de/bruno/hashfunc.html. */ + cnt = 0; + hval = keylen; + while (cnt < keylen) + { + hval = (hval << 9) | (hval >> (sizeof (unsigned long) * CHAR_BIT - 9)); + hval += (unsigned long int) *(((const char *) key) + cnt++); + } + return hval != 0 ? hval : ~((unsigned long) 0); +} + + +/* References: + [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986 + [Knuth] The Art of Computer Programming, part3 (6.4) */ + +/* Look up a given key in the hash table. + Return the index of the entry, if present, or otherwise the index a free + entry where it could be inserted. */ +static size_t +lookup (hash_table *htab, + const void *key, size_t keylen, + unsigned long int hval) +{ + unsigned long int hash; + size_t idx; hash_entry *table = (hash_entry *) htab->table; - size_t idx = lookup (htab, key, keylen, hval); + + /* First hash function: simply take the modul but prevent zero. */ + hash = 1 + hval % htab->size; + + idx = hash; if (table[idx].used) - /* We don't want to overwrite the old value. */ - return -1; - else { - /* An empty bucket has been found. */ - insert_entry_2 (htab, obstack_copy (&htab->mem_pool, key, keylen), - keylen, hval, idx, data); - if (100 * htab->filled > 75 * htab->size) - /* Table is filled more than 75%. Resize the table. */ - resize (htab); - return 0; + if (table[idx].used == hval && table[idx].keylen == keylen + && memcmp (table[idx].key, key, keylen) == 0) + return idx; + + /* Second hash function as suggested in [Knuth]. */ + hash = 1 + hval % (htab->size - 2); + + do + { + if (idx <= hash) + idx = htab->size + idx - hash; + else + idx -= hash; + + /* If entry is found use it. */ + if (table[idx].used == hval && table[idx].keylen == keylen + && memcmp (table[idx].key, key, keylen) == 0) + return idx; + } + while (table[idx].used); } + return idx; +} + + +/* Look up the value of a key in the given table. + If found, return 0 and set *RESULT to it. Otherwise return -1. */ +int +hash_find_entry (hash_table *htab, const void *key, size_t keylen, + void **result) +{ + hash_entry *table = (hash_entry *) htab->table; + size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen)); + + if (table[idx].used == 0) + return -1; + + *result = table[idx].data; + return 0; } + +/* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table at index IDX. + HVAL is the key's hash code. IDX depends on it. The table entry at index + IDX is known to be unused. */ static void insert_entry_2 (hash_table *htab, const void *key, size_t keylen, @@ -136,6 +236,7 @@ insert_entry_2 (hash_table *htab, } +/* Grow the hash table. */ static void resize (hash_table *htab) { @@ -160,23 +261,71 @@ resize (hash_table *htab) } +/* Try to insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table. + Return 0 if successful, or -1 if there is already an entry with the given + key. */ int -find_entry (hash_table *htab, const void *key, size_t keylen, void **result) +hash_insert_entry (hash_table *htab, + const void *key, size_t keylen, + void *data) { + unsigned long int hval = compute_hashval (key, keylen); hash_entry *table = (hash_entry *) htab->table; - size_t idx = lookup (htab, key, keylen, compute_hashval (key, keylen)); + size_t idx = lookup (htab, key, keylen, hval); - if (table[idx].used == 0) + if (table[idx].used) + /* We don't want to overwrite the old value. */ return -1; + else + { + /* An empty bucket has been found. */ + void *keycopy = obstack_copy (&htab->mem_pool, key, keylen); + insert_entry_2 (htab, keycopy, keylen, hval, idx, data); + if (100 * htab->filled > 75 * htab->size) + /* Table is filled more than 75%. Resize the table. */ + resize (htab); + return 0; + } +} - *result = table[idx].data; - return 0; + +/* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table. + Return 0. */ +int +hash_set_value (hash_table *htab, + const void *key, size_t keylen, + void *data) +{ + unsigned long int hval = compute_hashval (key, keylen); + hash_entry *table = (hash_entry *) htab->table; + size_t idx = lookup (htab, key, keylen, hval); + + if (table[idx].used) + { + /* Overwrite the old value. */ + table[idx].data = data; + return 0; + } + else + { + /* An empty bucket has been found. */ + void *keycopy = obstack_copy (&htab->mem_pool, key, keylen); + insert_entry_2 (htab, keycopy, keylen, hval, idx, data); + if (100 * htab->filled > 75 * htab->size) + /* Table is filled more than 75%. Resize the table. */ + resize (htab); + return 0; + } } +/* Steps *PTR forward to the next used entry in the given hash table. *PTR + should be initially set to NULL. Store information about the next entry + in *KEY, *KEYLEN, *DATA. + Return 0. */ int -iterate_table (hash_table *htab, void **ptr, const void **key, size_t *keylen, - void **data) +hash_iterate (hash_table *htab, void **ptr, const void **key, size_t *keylen, + void **data) { if (*ptr == NULL) { @@ -188,7 +337,7 @@ iterate_table (hash_table *htab, void **ptr, const void **key, size_t *keylen, { if (*ptr == htab->first) return -1; - *ptr = (void *) (((hash_entry *) *ptr)->next); + *ptr = (void *) ((hash_entry *) *ptr)->next; } *key = ((hash_entry *) *ptr)->key; @@ -196,98 +345,3 @@ iterate_table (hash_table *htab, void **ptr, const void **key, size_t *keylen, *data = ((hash_entry *) *ptr)->data; return 0; } - - -/* References: - [Aho,Sethi,Ullman] Compilers: Principles, Techniques and Tools, 1986 - [Knuth] The Art of Computer Programming, part3 (6.4) */ - -static size_t -lookup (hash_table *htab, - const void *key, size_t keylen, - unsigned long int hval) -{ - unsigned long int hash; - size_t idx; - hash_entry *table = (hash_entry *) htab->table; - - /* First hash function: simply take the modul but prevent zero. */ - hash = 1 + hval % htab->size; - - idx = hash; - - if (table[idx].used) - { - if (table[idx].used == hval && table[idx].keylen == keylen - && memcmp (table[idx].key, key, keylen) == 0) - return idx; - - /* Second hash function as suggested in [Knuth]. */ - hash = 1 + hval % (htab->size - 2); - - do - { - if (idx <= hash) - idx = htab->size + idx - hash; - else - idx -= hash; - - /* If entry is found use it. */ - if (table[idx].used == hval && table[idx].keylen == keylen - && memcmp (table[idx].key, key, keylen) == 0) - return idx; - } - while (table[idx].used); - } - return idx; -} - - -static unsigned long -compute_hashval (const void *key, size_t keylen) -{ - size_t cnt; - unsigned long int hval; - - /* Compute the hash value for the given string. The algorithm - is taken from [Aho,Sethi,Ullman]. */ - cnt = 0; - hval = keylen; - while (cnt < keylen) - { - hval = (hval << 9) | (hval >> (sizeof (unsigned long) * CHAR_BIT - 9)); - hval += (unsigned long int) *(((const char *) key) + cnt++); - } - return hval != 0 ? hval : ~((unsigned long) 0); -} - - -unsigned long -next_prime (unsigned long int seed) -{ - /* Make it definitely odd. */ - seed |= 1; - - while (!is_prime (seed)) - seed += 2; - - return seed; -} - - -static int -is_prime (unsigned long int candidate) -{ - /* No even number and none less than 10 will be passed here. */ - unsigned long int divn = 3; - unsigned long int sq = divn * divn; - - while (sq < candidate && candidate % divn != 0) - { - ++divn; - sq += 4 * divn; - ++divn; - } - - return candidate % divn != 0; -} diff --git a/gettext-tools/lib/hash.h b/gettext-tools/lib/hash.h index 8a67868..2e6ea52 100644 --- a/gettext-tools/lib/hash.h +++ b/gettext-tools/lib/hash.h @@ -1,4 +1,4 @@ -/* Copyright (C) 1995, 2000-2003 Free Software Foundation, Inc. +/* Copyright (C) 1995, 2000-2003, 2005 Free Software Foundation, Inc. The GNU C Library is free software; you can redistribute it and/or modify it under the terms of the GNU Library General Public License as @@ -18,7 +18,7 @@ #ifndef _HASH_H #define _HASH_H -#include <obstack.h> +#include "obstack.h" #ifdef __cplusplus extern "C" { @@ -26,27 +26,51 @@ extern "C" { typedef struct hash_table { - unsigned long int size; - unsigned long int filled; - void *first; - void *table; - struct obstack mem_pool; + unsigned long int size; /* Number of allocated entries. */ + unsigned long int filled; /* Number of used entries. */ + void *first; /* Pointer to head of list of entries. */ + void *table; /* Pointer to array of entries. */ + struct obstack mem_pool; /* Memory pool holding the keys. */ } hash_table; -extern int init_hash (hash_table *htab, unsigned long int init_size); -extern int delete_hash (hash_table *htab); -extern int insert_entry (hash_table *htab, - const void *key, size_t keylen, - void *data); -extern int find_entry (hash_table *htab, - const void *key, size_t keylen, - void **result); +/* Initialize a hash table. INIT_SIZE > 1 is the initial number of available + entries. + Return 0 upon successful completion, -1 upon memory allocation error. */ +extern int hash_init (hash_table *htab, unsigned long int init_size); -extern int iterate_table (hash_table *htab, void **ptr, - const void **key, size_t *keylen, - void **data); +/* Delete a hash table's contents. + Return 0 always. */ +extern int hash_destroy (hash_table *htab); +/* Look up the value of a key in the given table. + If found, return 0 and set *RESULT to it. Otherwise return -1. */ +extern int hash_find_entry (hash_table *htab, + const void *key, size_t keylen, + void **result); + +/* Try to insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table. + Return 0 if successful, or -1 if there is already an entry with the given + key. */ +extern int hash_insert_entry (hash_table *htab, + const void *key, size_t keylen, + void *data); + +/* Insert the pair (KEY[0..KEYLEN-1], DATA) in the hash table. + Return 0. */ +extern int hash_set_value (hash_table *htab, + const void *key, size_t keylen, + void *data); + +/* Steps *PTR forward to the next used entry in the given hash table. *PTR + should be initially set to NULL. Store information about the next entry + in *KEY, *KEYLEN, *DATA. + Return 0. */ +extern int hash_iterate (hash_table *htab, void **ptr, + const void **key, size_t *keylen, + void **data); + +/* Given SEED > 1, return the smallest odd prime number >= SEED. */ extern unsigned long int next_prime (unsigned long int seed); #ifdef __cplusplus |