[svn] Improve performance of grow_hash_table.

Published in <sxs66g8nd4c.fsf@florida.arsdigita.de>.
This commit is contained in:
hniksic 2001-04-14 00:41:29 -07:00
parent 61bb00adc0
commit ac7c8c1390
3 changed files with 65 additions and 30 deletions

View File

@ -1,3 +1,15 @@
2001-04-14 Hrvoje Niksic <hniksic@arsdigita.com>
* url.c: Don't declare `construct'.
* hash.c (grow_hash_table): Speed up rehashing; inline storing of
mappings to new locations.
(hash_table_new): Make resize_threshold a field in the hash table,
so we don't have to recalculate it in each hash_table_put.
(grow_hash_table): Update resize_threshold.
(MAX): Remove unused macro.
(prime_size): Made static.
2001-04-14 Hrvoje Niksic <hniksic@arsdigita.com> 2001-04-14 Hrvoje Niksic <hniksic@arsdigita.com>
* retr.c (retrieve_url): Call uri_merge, not url_concat. * retr.c (retrieve_url): Call uri_merge, not url_concat.

View File

@ -67,9 +67,9 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
The hashing and equality functions are normally provided by the The hashing and equality functions are normally provided by the
user. For the special (and frequent) case of hashing strings, you user. For the special (and frequent) case of hashing strings, you
can use the pre-canned make_string_hash_table(), which provides the can use the pre-canned make_string_hash_table(), which provides an
string hashing function from the Dragon Book, and a string equality efficient string hashing function, and a string equality wrapper
wrapper around strcmp(). around strcmp().
When specifying your own hash and test functions, make sure the When specifying your own hash and test functions, make sure the
following holds true: following holds true:
@ -143,6 +143,9 @@ struct hash_table {
int count; /* number of non-empty, non-deleted int count; /* number of non-empty, non-deleted
fields. */ fields. */
int resize_threshold; /* after size exceeds this number of
entries, resize the table. */
struct mapping *mappings; struct mapping *mappings;
}; };
@ -157,7 +160,7 @@ struct hash_table {
/* Find a prime near, but greather than or equal to SIZE. */ /* Find a prime near, but greather than or equal to SIZE. */
int static int
prime_size (int size) prime_size (int size)
{ {
static const unsigned long primes [] = { static const unsigned long primes [] = {
@ -180,9 +183,12 @@ prime_size (int size)
} }
/* Create a hash table of INITIAL_SIZE with hash function /* Create a hash table of INITIAL_SIZE with hash function
HASH_FUNCTION and test function TEST_FUNCTION. If you wish to HASH_FUNCTION and test function TEST_FUNCTION. INITIAL_SIZE will
start out with a "small" table which will be regrown as needed, be rounded to the next prime, so you don't have to worry about it
specify 0 as INITIAL_SIZE. */ being a prime number.
Consequently, if you wish to start out with a "small" table which
will be regrown as needed, specify INITIAL_SIZE 0. */
struct hash_table * struct hash_table *
hash_table_new (int initial_size, hash_table_new (int initial_size,
@ -191,12 +197,18 @@ hash_table_new (int initial_size,
{ {
struct hash_table *ht struct hash_table *ht
= (struct hash_table *)xmalloc (sizeof (struct hash_table)); = (struct hash_table *)xmalloc (sizeof (struct hash_table));
ht->hash_function = hash_function; ht->hash_function = hash_function;
ht->test_function = test_function; ht->test_function = test_function;
ht->size = prime_size (initial_size); ht->size = prime_size (initial_size);
ht->resize_threshold = ht->size * 3 / 4;
ht->count = 0; ht->count = 0;
ht->mappings = xmalloc (ht->size * sizeof (struct mapping)); ht->mappings = xmalloc (ht->size * sizeof (struct mapping));
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping)); memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
return ht; return ht;
} }
@ -210,8 +222,8 @@ hash_table_destroy (struct hash_table *ht)
} }
/* The heart of almost all functions in this file -- find the mapping /* The heart of almost all functions in this file -- find the mapping
whose KEY is equal to key, using a linear probing loop. Returns whose KEY is equal to key, using linear probing. Returns the
the offset of the mapping in ht->mappings. */ mapping that matches KEY, or NULL if none matches. */
static inline struct mapping * static inline struct mapping *
find_mapping (struct hash_table *ht, const void *key) find_mapping (struct hash_table *ht, const void *key)
@ -273,8 +285,6 @@ hash_table_exists (struct hash_table *ht, const void *key)
return find_mapping (ht, key) != NULL; return find_mapping (ht, key) != NULL;
} }
#define MAX(i, j) (((i) >= (j)) ? (i) : (j))
/* Grow hash table HT as necessary, and rehash all the key-value /* Grow hash table HT as necessary, and rehash all the key-value
mappings. */ mappings. */
@ -283,26 +293,33 @@ grow_hash_table (struct hash_table *ht)
{ {
struct mapping *old_mappings = ht->mappings; struct mapping *old_mappings = ht->mappings;
struct mapping *old_end = ht->mappings + ht->size; struct mapping *old_end = ht->mappings + ht->size;
struct mapping *mp; struct mapping *mp, *mappings;
int old_count = ht->count; /* for assert() below */ int newsize;
newsize = prime_size (ht->size * 2);
#if 0 #if 0
printf ("growing from %d to %d\n", ht->size, prime_size (ht->size * 2)); printf ("growing from %d to %d\n", ht->size, newsize);
#endif #endif
ht->size = prime_size (ht->size * 2); ht->size = newsize;
ht->resize_threshold = newsize * 3 / 4;
ht->mappings = xmalloc (ht->size * sizeof (struct mapping)); mappings = xmalloc (ht->size * sizeof (struct mapping));
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping)); memset (mappings, '\0', ht->size * sizeof (struct mapping));
ht->mappings = mappings;
/* Need to reset this; hash_table_put will reinitialize it. */
ht->count = 0;
for (mp = old_mappings; mp < old_end; mp++) for (mp = old_mappings; mp < old_end; mp++)
if (!EMPTY_MAPPING_P (mp)) if (!EMPTY_MAPPING_P (mp))
hash_table_put (ht, mp->key, mp->value); {
struct mapping *new_mp = mappings + HASH_POSITION (ht, mp->key);
/* We don't need to call test function and worry about
collisions because all the keys come from the hash table
and are therefore guaranteed to be unique. */
LOOP_NON_EMPTY (new_mp, mappings, newsize)
;
*new_mp = *mp;
}
assert (ht->count == old_count);
xfree (old_mappings); xfree (old_mappings);
} }
@ -330,7 +347,7 @@ hash_table_put (struct hash_table *ht, const void *key, void *value)
mp->key = (void *)key; /* const? */ mp->key = (void *)key; /* const? */
mp->value = value; mp->value = value;
if (ht->count > ht->size * 3 / 4) if (ht->count > ht->resize_threshold)
/* When table is 75% full, regrow it. */ /* When table is 75% full, regrow it. */
grow_hash_table (ht); grow_hash_table (ht);
} }
@ -353,9 +370,9 @@ hash_table_remove (struct hash_table *ht, const void *key)
--ht->count; --ht->count;
/* Rehash all the entries following MP. The alternative /* Rehash all the entries following MP. The alternative
approach is to mark entry as deleted, but that leaves a lot approach is to mark the entry as deleted, i.e. create a
of garbage. More importantly, this method makes "tombstone". That makes remove faster, but leaves a lot of
hash_table_get and hash_table_put measurably faster. */ garbage and slows down hash_table_get and hash_table_put. */
mp = NEXT_MAPPING (mp, mappings, size); mp = NEXT_MAPPING (mp, mappings, size);
LOOP_NON_EMPTY (mp, mappings, size) LOOP_NON_EMPTY (mp, mappings, size)
@ -389,7 +406,7 @@ void
hash_table_clear (struct hash_table *ht) hash_table_clear (struct hash_table *ht)
{ {
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping)); memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
ht->count = 0; ht->count = 0;
} }
/* Map MAPFUN over all the mappings in hash table HT. MAPFUN is /* Map MAPFUN over all the mappings in hash table HT. MAPFUN is
@ -416,6 +433,8 @@ hash_table_map (struct hash_table *ht,
key = mp->key; key = mp->key;
if (mapfun (key, mp->value, closure)) if (mapfun (key, mp->value, closure))
return; return;
/* hash_table_remove might have moved the adjacent
mappings. */
if (mp->key != key && !EMPTY_MAPPING_P (mp)) if (mp->key != key && !EMPTY_MAPPING_P (mp))
goto repeat; goto repeat;
} }
@ -424,6 +443,7 @@ hash_table_map (struct hash_table *ht,
/* Return the number of elements in the hash table. This is not the /* Return the number of elements in the hash table. This is not the
same as the physical size of the hash table, which is always same as the physical size of the hash table, which is always
greater than the number of elements. */ greater than the number of elements. */
int int
hash_table_count (struct hash_table *ht) hash_table_count (struct hash_table *ht)
{ {
@ -432,8 +452,12 @@ hash_table_count (struct hash_table *ht)
/* Support for hash tables whose keys are strings. */ /* Support for hash tables whose keys are strings. */
/* 31 bit hash function. Taken from Gnome's glib. This seems to /* 31 bit hash function. Taken from Gnome's glib, modified to use
perform much better than the above. */ standard C types.
We used to use the popular hash function from the Dragon Book, but
this one seems to perform much better. */
unsigned long unsigned long
string_hash (const void *key) string_hash (const void *key)
{ {

View File

@ -131,7 +131,6 @@ static struct proto sup_protos[] =
static void parse_dir PARAMS ((const char *, char **, char **)); static void parse_dir PARAMS ((const char *, char **, char **));
static uerr_t parse_uname PARAMS ((const char *, char **, char **)); static uerr_t parse_uname PARAMS ((const char *, char **, char **));
static char *construct PARAMS ((const char *, const char *, int , int));
static char *construct_relative PARAMS ((const char *, const char *)); static char *construct_relative PARAMS ((const char *, const char *));
static char process_ftp_type PARAMS ((char *)); static char process_ftp_type PARAMS ((char *));