[svn] Improve documentation of cookie code.

This commit is contained in:
hniksic 2003-10-06 17:47:08 -07:00
parent d1211bddb1
commit 6359e38d96
5 changed files with 121 additions and 92 deletions

View File

@ -1,3 +1,16 @@
2003-10-07 Hrvoje Niksic <hniksic@xemacs.org>
* cmpt.c (memmove): Comment out, since it's no longer used.
* cookies.c (cookie_jar_generate_cookie_header): Allocate room for
chains in one pass.
(find_chains_of_host): Assume that the caller has allocated DEST
to be sufficiently large to take all the data.
(eliminate_dups): Run through the array and eliminate dups on the
fly instead of using memmove.
(cookie_jar_process_set_cookie): Free cookie->domain before
re-setting it.
2003-10-05 Gisle Vanem <giva@bgnett.no> 2003-10-05 Gisle Vanem <giva@bgnett.no>
* mswindows.c (set_sleep_mode): Fix type of * mswindows.c (set_sleep_mode): Fix type of

View File

@ -1446,6 +1446,10 @@ usleep (unsigned long usec)
#endif /* not HAVE_USLEEP */ #endif /* not HAVE_USLEEP */
/* Currently unused in Wget. Uncomment if we start using memmove
again. */
#if 0
#ifndef HAVE_MEMMOVE #ifndef HAVE_MEMMOVE
void * void *
memmove (char *dest, const char *source, unsigned length) memmove (char *dest, const char *source, unsigned length)
@ -1464,3 +1468,5 @@ memmove (char *dest, const char *source, unsigned length)
return (void *) d0; return (void *) d0;
} }
#endif /* not HAVE_MEMMOVE */ #endif /* not HAVE_MEMMOVE */
#endif /* 0 */

View File

@ -30,11 +30,17 @@ so, delete this exception statement from your version. */
/* Written by Hrvoje Niksic. Parts are loosely inspired by cookie /* Written by Hrvoje Niksic. Parts are loosely inspired by cookie
code submitted by Tomasz Wegrzanowski. code submitted by Tomasz Wegrzanowski.
TODO: Implement limits on cookie-related sizes, such as max. cookie Ideas for future work:
size, max. number of cookies, etc. Add more "cookie jar" methods,
such as methods to over stored cookies, to clear temporary cookies, * Implement limits on cookie-related sizes, such as max. cookie
to perform intelligent auto-saving, etc. Ultimately support size, max. number of cookies, etc.
`Set-Cookie2' and `Cookie2' headers. */
* Add more "cookie jar" methods, such as methods to iterate over
stored cookies, to clear temporary cookies, to perform
intelligent auto-saving, etc.
* Support `Set-Cookie2' and `Cookie2' headers? Does anyone really
use them? */
#include <config.h> #include <config.h>
@ -58,10 +64,20 @@ time_t http_atotm PARAMS ((const char *));
/* Declarations of `struct cookie' and the most basic functions. */ /* Declarations of `struct cookie' and the most basic functions. */
/* Cookie jar serves as cookie storage and a means of retrieving
cookies efficiently. All cookies with the same domain are stored
in a linked list called "chain". A cookie chain can be reached by
looking up the domain in the cookie jar's chains_by_domain table.
For example, to reach all the cookies under google.com, one must
execute hash_table_get(jar->chains_by_domain, "google.com"). Of
course, when sending a cookie to `www.google.com', one must search
for cookies that belong to either `www.google.com' or `google.com'
-- but the point is that the code doesn't need to go through *all*
the cookies. */
struct cookie_jar { struct cookie_jar {
/* Hash table that maps domain names to cookie chains. A "cookie /* Mapping between domains and their corresponding cookies. */
chain" is a linked list of cookies that belong to the same
domain. */
struct hash_table *chains_by_domain; struct hash_table *chains_by_domain;
int cookie_count; /* number of cookies in the jar. */ int cookie_count; /* number of cookies in the jar. */
@ -227,10 +243,11 @@ store_cookie (struct cookie_jar *jar, struct cookie *cookie)
} }
else else
{ {
/* We are now creating the chain. Allocate the string that will /* We are now creating the chain. Use a copy of cookie->domain
be used as a key. It is unsafe to use cookie->domain for as the key for the life-time of the chain. Using
that, because it might get deallocated by the above code at cookie->domain would be unsafe because the life-time of the
some point later. */ chain may exceed the life-time of the cookie. (Cookies may
be deleted from the chain by this very function.) */
cookie->next = NULL; cookie->next = NULL;
chain_key = xstrdup (cookie->domain); chain_key = xstrdup (cookie->domain);
} }
@ -298,7 +315,6 @@ discard_matching_cookie (struct cookie_jar *jar, struct cookie *cookie)
/* Functions for parsing the `Set-Cookie' header, and creating new /* Functions for parsing the `Set-Cookie' header, and creating new
cookies from the wire. */ cookies from the wire. */
#define NAME_IS(string_literal) \ #define NAME_IS(string_literal) \
BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal) BOUNDED_EQUAL_NO_CASE (name_b, name_e, string_literal)
@ -772,7 +788,8 @@ check_domain_match (const char *cookie_domain, const char *host)
DEBUGP ((" 7")); DEBUGP ((" 7"));
/* Don't allow domain "bar.com" to match host "foobar.com". */ /* Don't allow the host "foobar.com" to set a cookie for domain
"bar.com". */
if (*cookie_domain != '.') if (*cookie_domain != '.')
{ {
int dlen = strlen (cookie_domain); int dlen = strlen (cookie_domain);
@ -830,9 +847,11 @@ cookie_jar_process_set_cookie (struct cookie_jar *jar,
logprintf (LOG_NOTQUIET, logprintf (LOG_NOTQUIET,
"Cookie coming from %s attempted to set domain to %s\n", "Cookie coming from %s attempted to set domain to %s\n",
host, cookie->domain); host, cookie->domain);
xfree (cookie->domain);
goto copy_domain; goto copy_domain;
} }
} }
if (!cookie->path) if (!cookie->path)
cookie->path = xstrdup (path); cookie->path = xstrdup (path);
else else
@ -863,23 +882,26 @@ cookie_jar_process_set_cookie (struct cookie_jar *jar,
previously stored cookies. Entry point is previously stored cookies. Entry point is
`build_cookies_request'. */ `build_cookies_request'. */
/* Find the cookie chains that match HOST and store them to DEST. /* Find the cookie chains whose domains match HOST and store them to
DEST.
A cookie chain is the list of cookies declared under a domain. A cookie chain is the head of a list of cookies that belong to a
Given HOST "img.search.xemacs.org", this function will store the host/domain. Given HOST "img.search.xemacs.org", this function
chains for "img.search.xemacs.org", "search.xemacs.org", and will return the chains for "img.search.xemacs.org",
"xemacs.org" -- those of them that exist (if any), that is. "search.xemacs.org", and "xemacs.org" -- those of them that exist
(if any), that is.
No more than SIZE matches are written; if more matches are present, DEST should be large enough to accept (in the worst case) as many
return the number of chains that would have been written. */ elements as there are domain components of HOST. */
static int static int
find_matching_chains (struct cookie_jar *jar, const char *host, find_chains_of_host (struct cookie_jar *jar, const char *host,
struct cookie *dest[], int dest_size) struct cookie *dest[])
{ {
int dest_count = 0; int dest_count = 0;
int passes, passcnt; int passes, passcnt;
/* Bail out quickly if there are no cookies in the jar. */
if (!hash_table_count (jar->chains_by_domain)) if (!hash_table_count (jar->chains_by_domain))
return 0; return 0;
@ -902,11 +924,7 @@ find_matching_chains (struct cookie_jar *jar, const char *host,
{ {
struct cookie *chain = hash_table_get (jar->chains_by_domain, host); struct cookie *chain = hash_table_get (jar->chains_by_domain, host);
if (chain) if (chain)
{ dest[dest_count++] = chain;
if (dest_count < dest_size)
dest[dest_count] = chain;
++dest_count;
}
if (++passcnt >= passes) if (++passcnt >= passes)
break; break;
host = strchr (host, '.') + 1; host = strchr (host, '.') + 1;
@ -925,8 +943,8 @@ path_matches (const char *full_path, const char *prefix)
if (*prefix != '/') if (*prefix != '/')
/* Wget's HTTP paths do not begin with '/' (the URL code treats it /* Wget's HTTP paths do not begin with '/' (the URL code treats it
as a separator), but the '/' is assumed when matching against as a mere separator, inspired by rfc1808), but the '/' is
the cookie stuff. */ assumed when matching against the cookie stuff. */
return 0; return 0;
++prefix; ++prefix;
@ -940,17 +958,17 @@ path_matches (const char *full_path, const char *prefix)
return len + 1; return len + 1;
} }
/* Return non-zero iff COOKIE matches the given HOST, PORT, PATH, and /* Return non-zero iff COOKIE matches the provided parameters of the
SECFLAG. URL being downloaded: HOST, PORT, PATH, and SECFLAG.
If PATH_GOODNESS is non-NULL, store the "path goodness" value If PATH_GOODNESS is non-NULL, store the "path goodness" value
there. That value is a measure of how well COOKIE matches PATH, there. That value is a measure of how closely COOKIE matches PATH,
used for ordering cookies. */ used for ordering cookies. */
static int static int
matching_cookie (const struct cookie *cookie, cookie_matches_url (const struct cookie *cookie,
const char *host, int port, const char *path, const char *host, int port, const char *path,
int secure, int *path_goodness) int secflag, int *path_goodness)
{ {
int pg; int pg;
@ -962,7 +980,7 @@ matching_cookie (const struct cookie *cookie,
possible. */ possible. */
return 0; return 0;
if (cookie->secure && !secure) if (cookie->secure && !secflag)
/* Don't transmit secure cookies over insecure connections. */ /* Don't transmit secure cookies over insecure connections. */
return 0; return 0;
if (cookie->port != PORT_ANY && cookie->port != port) if (cookie->port != PORT_ANY && cookie->port != port)
@ -970,7 +988,7 @@ matching_cookie (const struct cookie *cookie,
/* If exact domain match is required, verify that cookie's domain is /* If exact domain match is required, verify that cookie's domain is
equal to HOST. If not, assume success on the grounds of the equal to HOST. If not, assume success on the grounds of the
cookie's chain having been found by find_matching_chains. */ cookie's chain having been found by find_chains_of_host. */
if (cookie->domain_exact if (cookie->domain_exact
&& 0 != strcasecmp (host, cookie->domain)) && 0 != strcasecmp (host, cookie->domain))
return 0; return 0;
@ -1015,40 +1033,45 @@ equality_comparator (const void *p1, const void *p2)
} }
/* Eliminate duplicate cookies. "Duplicate cookies" are any two /* Eliminate duplicate cookies. "Duplicate cookies" are any two
cookies whose name and value are the same. Whenever a duplicate cookies with the same attr name and value. Whenever a duplicate
pair is found, one of the cookies is removed. */ pair is found, one of the cookies is removed. */
static int static int
eliminate_dups (struct weighed_cookie *outgoing, int count) eliminate_dups (struct weighed_cookie *outgoing, int count)
{ {
int i; struct weighed_cookie *h; /* hare */
struct weighed_cookie *t; /* tortoise */
struct weighed_cookie *end = outgoing + count;
/* We deploy a simple uniquify algorithm: first sort the array /* We deploy a simple uniquify algorithm: first sort the array
according to our sort criteria, then uniquify it by comparing according to our sort criteria, then copy it to itself, comparing
each cookie with its neighbor. */ each cookie to its neighbor and ignoring the duplicates. */
qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator); qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
for (i = 0; i < count - 1; i++) /* "Hare" runs through all the entries in the array, followed by
"tortoise". If a duplicate is found, the hare skips it.
Non-duplicate entries are copied to the tortoise ptr. */
for (h = t = outgoing; h < end; h++)
{ {
struct cookie *c1 = outgoing[i].cookie; if (h != end - 1)
struct cookie *c2 = outgoing[i + 1].cookie;
if (!strcmp (c1->attr, c2->attr) && !strcmp (c1->value, c2->value))
{ {
/* c1 and c2 are the same; get rid of c2. */ struct cookie *c0 = h[0].cookie;
if (count > i + 1) struct cookie *c1 = h[1].cookie;
/* move all ptrs from positions [i + 1, count) to i. */ if (!strcmp (c0->attr, c1->attr) && !strcmp (c0->value, c1->value))
memmove (outgoing + i, outgoing + i + 1, continue; /* ignore the duplicate */
(count - (i + 1)) * sizeof (struct weighed_cookie));
/* We decrement i to counter the ++i above. Remember that
we've just removed the element in front of us; we need to
remain in place to check whether outgoing[i] matches what
used to be outgoing[i + 2]. */
--i;
--count;
} }
/* If the hare has advanced past the tortoise (because of
previous dups), make sure the values get copied. Otherwise,
no copying is necessary. */
if (h != t)
*t++ = *h;
else
t++;
} }
return count; return t - outgoing;
} }
/* Comparator used for sorting by quality. */ /* Comparator used for sorting by quality. */
@ -1081,9 +1104,7 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
int port, const char *path, int port, const char *path,
int connection_secure_p) int connection_secure_p)
{ {
struct cookie *chain_default_store[5]; struct cookie **chains;
struct cookie **chains = chain_default_store;
int chain_store_size = countof (chain_default_store);
int chain_count; int chain_count;
struct cookie *cookie; struct cookie *cookie;
@ -1092,19 +1113,15 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
char *result; char *result;
int result_size, pos; int result_size, pos;
/* First, find the chains that match HOST. */ /* First, find the cookie chains whose domains match HOST. */
again:
chain_count = find_matching_chains (jar, host, chains, chain_store_size);
if (chain_count > chain_store_size)
{
/* It's unlikely that more than 5 chains will ever match. But
since find_matching_chains reports the exact size it needs,
it's easy to not have the limitation, so we don't. */
chains = alloca (chain_count * sizeof (struct cookie *));
chain_store_size = chain_count;
goto again;
}
/* Allocate room for find_chains_of_host to write to. The number of
chains can at most equal the number of subdomains, hence
1+<number of dots>. */
chains = alloca_array (struct cookie *, 1 + count_char (host, '.'));
chain_count = find_chains_of_host (jar, host, chains);
/* No cookies for this host. */
if (!chain_count) if (!chain_count)
return NULL; return NULL;
@ -1118,13 +1135,14 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
count = 0; count = 0;
for (i = 0; i < chain_count; i++) for (i = 0; i < chain_count; i++)
for (cookie = chains[i]; cookie; cookie = cookie->next) for (cookie = chains[i]; cookie; cookie = cookie->next)
if (matching_cookie (cookie, host, port, path, connection_secure_p, NULL)) if (cookie_matches_url (cookie, host, port, path, connection_secure_p,
NULL))
++count; ++count;
if (!count) if (!count)
return NULL; /* no cookies matched */ return NULL; /* no cookies matched */
/* Allocate the array. */ /* Allocate the array. */
outgoing = alloca (count * sizeof (struct weighed_cookie)); outgoing = alloca_array (struct weighed_cookie, count);
/* Fill the array with all the matching cookies from the chains that /* Fill the array with all the matching cookies from the chains that
match HOST. */ match HOST. */
@ -1133,7 +1151,7 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
for (cookie = chains[i]; cookie; cookie = cookie->next) for (cookie = chains[i]; cookie; cookie = cookie->next)
{ {
int pg; int pg;
if (!matching_cookie (cookie, host, port, path, if (!cookie_matches_url (cookie, host, port, path,
connection_secure_p, &pg)) connection_secure_p, &pg))
continue; continue;
outgoing[ocnt].cookie = cookie; outgoing[ocnt].cookie = cookie;

View File

@ -803,9 +803,8 @@ Can't timestamp and not clobber old files at the same time.\n"));
if (opt.verbose) if (opt.verbose)
set_progress_implementation (opt.progress_type); set_progress_implementation (opt.progress_type);
/* Allocate basic pointer. */
url = (char **) alloca ((nurl + 1) * sizeof (char *));
/* Fill in the arguments. */ /* Fill in the arguments. */
url = alloca_array (char *, nurl + 1);
for (i = 0; i < nurl; i++, optind++) for (i = 0; i < nurl; i++, optind++)
{ {
char *rewritten = rewrite_shorthand_url (argv[optind]); char *rewritten = rewrite_shorthand_url (argv[optind]);
@ -928,9 +927,7 @@ Can't timestamp and not clobber old files at the same time.\n"));
cookie_jar_save (wget_cookie_jar, opt.cookies_output); cookie_jar_save (wget_cookie_jar, opt.cookies_output);
if (opt.convert_links && !opt.delete_after) if (opt.convert_links && !opt.delete_after)
{
convert_all_links (); convert_all_links ();
}
log_close (); log_close ();
for (i = 0; i < nurl; i++) for (i = 0; i < nurl; i++)

View File

@ -185,18 +185,13 @@ char *xstrdup_debug PARAMS ((const char *, const char *, int));
int a[5] = {1, 2}; -- countof(a) == 5 int a[5] = {1, 2}; -- countof(a) == 5
char *a[3] = { -- countof(a) == 3 char *a[] = { -- countof(a) == 3
"foo", "bar", "baz" "foo", "bar", "baz"
}; }; */
And, most importantly, it works when the compiler counts the array
elements for you:
char *a[] = { -- countof(a) == 4
"foo", "bar", "baz", "qux"
} */
#define countof(array) (sizeof (array) / sizeof (*(array))) #define countof(array) (sizeof (array) / sizeof (*(array)))
#define alloca_array(type, size) ((type *) alloca ((size) * sizeof (type)))
/* Copy the data delimited with BEG and END to alloca-allocated /* Copy the data delimited with BEG and END to alloca-allocated
storage, and zero-terminate it. Arguments are evaluated only once, storage, and zero-terminate it. Arguments are evaluated only once,
in the order BEG, END, PLACE. */ in the order BEG, END, PLACE. */
@ -232,7 +227,7 @@ char *xstrdup_debug PARAMS ((const char *, const char *, int));
#define STRDUP_ALLOCA(ptr, str) do { \ #define STRDUP_ALLOCA(ptr, str) do { \
(ptr) = (char *)alloca (strlen (str) + 1); \ (ptr) = (char *)alloca (strlen (str) + 1); \
strcpy (ptr, str); \ strcpy ((ptr), (str)); \
} while (0) } while (0)
/* Generally useful if you want to avoid arbitrary size limits but /* Generally useful if you want to avoid arbitrary size limits but