mirror of
https://github.com/mirror/wget.git
synced 2025-01-21 17:50:30 +08:00
[svn] Improve performance of grow_hash_table.
Published in <sxs66g8nd4c.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
61bb00adc0
commit
ac7c8c1390
@ -1,3 +1,15 @@
|
|||||||
|
2001-04-14 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* url.c: Don't declare `construct'.
|
||||||
|
|
||||||
|
* hash.c (grow_hash_table): Speed up rehashing; inline storing of
|
||||||
|
mappings to new locations.
|
||||||
|
(hash_table_new): Make resize_threshold a field in the hash table,
|
||||||
|
so we don't have to recalculate it in each hash_table_put.
|
||||||
|
(grow_hash_table): Update resize_threshold.
|
||||||
|
(MAX): Remove unused macro.
|
||||||
|
(prime_size): Made static.
|
||||||
|
|
||||||
2001-04-14 Hrvoje Niksic <hniksic@arsdigita.com>
|
2001-04-14 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
* retr.c (retrieve_url): Call uri_merge, not url_concat.
|
* retr.c (retrieve_url): Call uri_merge, not url_concat.
|
||||||
|
82
src/hash.c
82
src/hash.c
@ -67,9 +67,9 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
|||||||
|
|
||||||
The hashing and equality functions are normally provided by the
|
The hashing and equality functions are normally provided by the
|
||||||
user. For the special (and frequent) case of hashing strings, you
|
user. For the special (and frequent) case of hashing strings, you
|
||||||
can use the pre-canned make_string_hash_table(), which provides the
|
can use the pre-canned make_string_hash_table(), which provides an
|
||||||
string hashing function from the Dragon Book, and a string equality
|
efficient string hashing function, and a string equality wrapper
|
||||||
wrapper around strcmp().
|
around strcmp().
|
||||||
|
|
||||||
When specifying your own hash and test functions, make sure the
|
When specifying your own hash and test functions, make sure the
|
||||||
following holds true:
|
following holds true:
|
||||||
@ -143,6 +143,9 @@ struct hash_table {
|
|||||||
int count; /* number of non-empty, non-deleted
|
int count; /* number of non-empty, non-deleted
|
||||||
fields. */
|
fields. */
|
||||||
|
|
||||||
|
int resize_threshold; /* after size exceeds this number of
|
||||||
|
entries, resize the table. */
|
||||||
|
|
||||||
struct mapping *mappings;
|
struct mapping *mappings;
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -157,7 +160,7 @@ struct hash_table {
|
|||||||
|
|
||||||
/* Find a prime near, but greather than or equal to SIZE. */
|
/* Find a prime near, but greather than or equal to SIZE. */
|
||||||
|
|
||||||
int
|
static int
|
||||||
prime_size (int size)
|
prime_size (int size)
|
||||||
{
|
{
|
||||||
static const unsigned long primes [] = {
|
static const unsigned long primes [] = {
|
||||||
@ -180,9 +183,12 @@ prime_size (int size)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* Create a hash table of INITIAL_SIZE with hash function
|
/* Create a hash table of INITIAL_SIZE with hash function
|
||||||
HASH_FUNCTION and test function TEST_FUNCTION. If you wish to
|
HASH_FUNCTION and test function TEST_FUNCTION. INITIAL_SIZE will
|
||||||
start out with a "small" table which will be regrown as needed,
|
be rounded to the next prime, so you don't have to worry about it
|
||||||
specify 0 as INITIAL_SIZE. */
|
being a prime number.
|
||||||
|
|
||||||
|
Consequently, if you wish to start out with a "small" table which
|
||||||
|
will be regrown as needed, specify INITIAL_SIZE 0. */
|
||||||
|
|
||||||
struct hash_table *
|
struct hash_table *
|
||||||
hash_table_new (int initial_size,
|
hash_table_new (int initial_size,
|
||||||
@ -191,12 +197,18 @@ hash_table_new (int initial_size,
|
|||||||
{
|
{
|
||||||
struct hash_table *ht
|
struct hash_table *ht
|
||||||
= (struct hash_table *)xmalloc (sizeof (struct hash_table));
|
= (struct hash_table *)xmalloc (sizeof (struct hash_table));
|
||||||
|
|
||||||
ht->hash_function = hash_function;
|
ht->hash_function = hash_function;
|
||||||
ht->test_function = test_function;
|
ht->test_function = test_function;
|
||||||
|
|
||||||
ht->size = prime_size (initial_size);
|
ht->size = prime_size (initial_size);
|
||||||
|
ht->resize_threshold = ht->size * 3 / 4;
|
||||||
|
|
||||||
ht->count = 0;
|
ht->count = 0;
|
||||||
|
|
||||||
ht->mappings = xmalloc (ht->size * sizeof (struct mapping));
|
ht->mappings = xmalloc (ht->size * sizeof (struct mapping));
|
||||||
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
|
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
|
||||||
|
|
||||||
return ht;
|
return ht;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -210,8 +222,8 @@ hash_table_destroy (struct hash_table *ht)
|
|||||||
}
|
}
|
||||||
|
|
||||||
/* The heart of almost all functions in this file -- find the mapping
|
/* The heart of almost all functions in this file -- find the mapping
|
||||||
whose KEY is equal to key, using a linear probing loop. Returns
|
whose KEY is equal to key, using linear probing. Returns the
|
||||||
the offset of the mapping in ht->mappings. */
|
mapping that matches KEY, or NULL if none matches. */
|
||||||
|
|
||||||
static inline struct mapping *
|
static inline struct mapping *
|
||||||
find_mapping (struct hash_table *ht, const void *key)
|
find_mapping (struct hash_table *ht, const void *key)
|
||||||
@ -273,8 +285,6 @@ hash_table_exists (struct hash_table *ht, const void *key)
|
|||||||
return find_mapping (ht, key) != NULL;
|
return find_mapping (ht, key) != NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
#define MAX(i, j) (((i) >= (j)) ? (i) : (j))
|
|
||||||
|
|
||||||
/* Grow hash table HT as necessary, and rehash all the key-value
|
/* Grow hash table HT as necessary, and rehash all the key-value
|
||||||
mappings. */
|
mappings. */
|
||||||
|
|
||||||
@ -283,26 +293,33 @@ grow_hash_table (struct hash_table *ht)
|
|||||||
{
|
{
|
||||||
struct mapping *old_mappings = ht->mappings;
|
struct mapping *old_mappings = ht->mappings;
|
||||||
struct mapping *old_end = ht->mappings + ht->size;
|
struct mapping *old_end = ht->mappings + ht->size;
|
||||||
struct mapping *mp;
|
struct mapping *mp, *mappings;
|
||||||
int old_count = ht->count; /* for assert() below */
|
int newsize;
|
||||||
|
|
||||||
|
newsize = prime_size (ht->size * 2);
|
||||||
#if 0
|
#if 0
|
||||||
printf ("growing from %d to %d\n", ht->size, prime_size (ht->size * 2));
|
printf ("growing from %d to %d\n", ht->size, newsize);
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
ht->size = prime_size (ht->size * 2);
|
ht->size = newsize;
|
||||||
|
ht->resize_threshold = newsize * 3 / 4;
|
||||||
|
|
||||||
ht->mappings = xmalloc (ht->size * sizeof (struct mapping));
|
mappings = xmalloc (ht->size * sizeof (struct mapping));
|
||||||
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
|
memset (mappings, '\0', ht->size * sizeof (struct mapping));
|
||||||
|
ht->mappings = mappings;
|
||||||
/* Need to reset this; hash_table_put will reinitialize it. */
|
|
||||||
ht->count = 0;
|
|
||||||
|
|
||||||
for (mp = old_mappings; mp < old_end; mp++)
|
for (mp = old_mappings; mp < old_end; mp++)
|
||||||
if (!EMPTY_MAPPING_P (mp))
|
if (!EMPTY_MAPPING_P (mp))
|
||||||
hash_table_put (ht, mp->key, mp->value);
|
{
|
||||||
|
struct mapping *new_mp = mappings + HASH_POSITION (ht, mp->key);
|
||||||
|
/* We don't need to call test function and worry about
|
||||||
|
collisions because all the keys come from the hash table
|
||||||
|
and are therefore guaranteed to be unique. */
|
||||||
|
LOOP_NON_EMPTY (new_mp, mappings, newsize)
|
||||||
|
;
|
||||||
|
*new_mp = *mp;
|
||||||
|
}
|
||||||
|
|
||||||
assert (ht->count == old_count);
|
|
||||||
xfree (old_mappings);
|
xfree (old_mappings);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -330,7 +347,7 @@ hash_table_put (struct hash_table *ht, const void *key, void *value)
|
|||||||
mp->key = (void *)key; /* const? */
|
mp->key = (void *)key; /* const? */
|
||||||
mp->value = value;
|
mp->value = value;
|
||||||
|
|
||||||
if (ht->count > ht->size * 3 / 4)
|
if (ht->count > ht->resize_threshold)
|
||||||
/* When table is 75% full, regrow it. */
|
/* When table is 75% full, regrow it. */
|
||||||
grow_hash_table (ht);
|
grow_hash_table (ht);
|
||||||
}
|
}
|
||||||
@ -353,9 +370,9 @@ hash_table_remove (struct hash_table *ht, const void *key)
|
|||||||
--ht->count;
|
--ht->count;
|
||||||
|
|
||||||
/* Rehash all the entries following MP. The alternative
|
/* Rehash all the entries following MP. The alternative
|
||||||
approach is to mark entry as deleted, but that leaves a lot
|
approach is to mark the entry as deleted, i.e. create a
|
||||||
of garbage. More importantly, this method makes
|
"tombstone". That makes remove faster, but leaves a lot of
|
||||||
hash_table_get and hash_table_put measurably faster. */
|
garbage and slows down hash_table_get and hash_table_put. */
|
||||||
|
|
||||||
mp = NEXT_MAPPING (mp, mappings, size);
|
mp = NEXT_MAPPING (mp, mappings, size);
|
||||||
LOOP_NON_EMPTY (mp, mappings, size)
|
LOOP_NON_EMPTY (mp, mappings, size)
|
||||||
@ -389,7 +406,7 @@ void
|
|||||||
hash_table_clear (struct hash_table *ht)
|
hash_table_clear (struct hash_table *ht)
|
||||||
{
|
{
|
||||||
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
|
memset (ht->mappings, '\0', ht->size * sizeof (struct mapping));
|
||||||
ht->count = 0;
|
ht->count = 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Map MAPFUN over all the mappings in hash table HT. MAPFUN is
|
/* Map MAPFUN over all the mappings in hash table HT. MAPFUN is
|
||||||
@ -416,6 +433,8 @@ hash_table_map (struct hash_table *ht,
|
|||||||
key = mp->key;
|
key = mp->key;
|
||||||
if (mapfun (key, mp->value, closure))
|
if (mapfun (key, mp->value, closure))
|
||||||
return;
|
return;
|
||||||
|
/* hash_table_remove might have moved the adjacent
|
||||||
|
mappings. */
|
||||||
if (mp->key != key && !EMPTY_MAPPING_P (mp))
|
if (mp->key != key && !EMPTY_MAPPING_P (mp))
|
||||||
goto repeat;
|
goto repeat;
|
||||||
}
|
}
|
||||||
@ -424,6 +443,7 @@ hash_table_map (struct hash_table *ht,
|
|||||||
/* Return the number of elements in the hash table. This is not the
|
/* Return the number of elements in the hash table. This is not the
|
||||||
same as the physical size of the hash table, which is always
|
same as the physical size of the hash table, which is always
|
||||||
greater than the number of elements. */
|
greater than the number of elements. */
|
||||||
|
|
||||||
int
|
int
|
||||||
hash_table_count (struct hash_table *ht)
|
hash_table_count (struct hash_table *ht)
|
||||||
{
|
{
|
||||||
@ -432,8 +452,12 @@ hash_table_count (struct hash_table *ht)
|
|||||||
|
|
||||||
/* Support for hash tables whose keys are strings. */
|
/* Support for hash tables whose keys are strings. */
|
||||||
|
|
||||||
/* 31 bit hash function. Taken from Gnome's glib. This seems to
|
/* 31 bit hash function. Taken from Gnome's glib, modified to use
|
||||||
perform much better than the above. */
|
standard C types.
|
||||||
|
|
||||||
|
We used to use the popular hash function from the Dragon Book, but
|
||||||
|
this one seems to perform much better. */
|
||||||
|
|
||||||
unsigned long
|
unsigned long
|
||||||
string_hash (const void *key)
|
string_hash (const void *key)
|
||||||
{
|
{
|
||||||
|
@ -131,7 +131,6 @@ static struct proto sup_protos[] =
|
|||||||
|
|
||||||
static void parse_dir PARAMS ((const char *, char **, char **));
|
static void parse_dir PARAMS ((const char *, char **, char **));
|
||||||
static uerr_t parse_uname PARAMS ((const char *, char **, char **));
|
static uerr_t parse_uname PARAMS ((const char *, char **, char **));
|
||||||
static char *construct PARAMS ((const char *, const char *, int , int));
|
|
||||||
static char *construct_relative PARAMS ((const char *, const char *));
|
static char *construct_relative PARAMS ((const char *, const char *));
|
||||||
static char process_ftp_type PARAMS ((char *));
|
static char process_ftp_type PARAMS ((char *));
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user