[svn] Fix a bug when loading cookies. Don't ignore domain-flag.

This commit is contained in:
hniksic 2003-09-22 18:08:01 -07:00
parent 625a9d3a9f
commit 45164e05cc
2 changed files with 155 additions and 114 deletions

View File

@ -1,3 +1,18 @@
2003-09-23 Hrvoje Niksic <hniksic@xemacs.org>
* cookies.c (struct cookie): New flag domain_exact.
(update_cookie_field): Skip leading dot in domain.
(find_matching_chains): Match numeric addresses exactly; don't
needlessly copy HOST to the stack.
(matching_cookie): Added argument HOST. If cookie->domain_exact
is set, check that HOST is equal to cookie->domain.
(cookie_jar_load): Only use TAB as delimiter. Document the
meaning of DOMAIN-FLAG. Skip leading dot in domain.
(cookie_jar_load): Don't ignore DOMAIN-FLAG -- instead, set
domain_exact to true if DOMAIN-FLAG is false.
(save_cookies_mapper): If domain_exact is false, prepend the
domain with dot, like Mozilla does.
2003-09-22 Hrvoje Niksic <hniksic@xemacs.org> 2003-09-22 Hrvoje Niksic <hniksic@xemacs.org>
* progress.c (create_image): Print the initial part of the * progress.c (create_image): Print the initial part of the

View File

@ -84,12 +84,17 @@ struct cookie {
char *domain; /* domain of the cookie */ char *domain; /* domain of the cookie */
int port; /* port number */ int port; /* port number */
char *path; /* path prefix of the cookie */ char *path; /* path prefix of the cookie */
int secure; /* whether cookie should be int secure; /* whether cookie should be
transmitted over non-https transmitted over non-https
connections. */ connections. */
int domain_exact; /* whether DOMAIN must match as a
whole. */
int permanent; /* whether the cookie should outlive int permanent; /* whether the cookie should outlive
the session */ the session */
time_t expiry_time; /* time when the cookie expires */ time_t expiry_time; /* time when the cookie expires */
int discard_requested; /* whether cookie was created to int discard_requested; /* whether cookie was created to
request discarding another request discarding another
cookie */ cookie */
@ -335,6 +340,12 @@ update_cookie_field (struct cookie *cookie,
if (!VALUE_NON_EMPTY) if (!VALUE_NON_EMPTY)
return 0; return 0;
FREE_MAYBE (cookie->domain); FREE_MAYBE (cookie->domain);
/* Strictly speaking, we should set cookie->domain_exact if the
domain doesn't begin with a dot. But many sites set the
domain to "foo.com" and expect "subhost.foo.com" to get the
cookie, and it apparently works. */
if (*value_b == '.')
++value_b;
cookie->domain = strdupdelim (value_b, value_e); cookie->domain = strdupdelim (value_b, value_e);
return 1; return 1;
} }
@ -665,7 +676,7 @@ check_domain_match (const char *cookie_domain, const char *host)
DEBUGP ((" 2")); DEBUGP ((" 2"));
/* For the sake of efficiency, check for exact match first. */ /* For the sake of efficiency, check for exact match first. */
if (!strcasecmp (cookie_domain, host)) if (0 == strcasecmp (cookie_domain, host))
return 1; return 1;
DEBUGP ((" 3")); DEBUGP ((" 3"));
@ -852,57 +863,56 @@ cookie_jar_process_set_cookie (struct cookie_jar *jar,
previously stored cookies. Entry point is previously stored cookies. Entry point is
`build_cookies_request'. */ `build_cookies_request'. */
/* Store CHAIN to STORE if there is room in STORE. If not, inrecement /* Find the cookie chains that match HOST and store them to DEST.
COUNT anyway, so that when the function is done, we end up with the
exact count of how much place we actually need. */
#define STORE_CHAIN(st_chain, st_store, st_size, st_count) do { \ A cookie chain is the list of cookies declared under a domain.
if (st_count < st_size) \ Given HOST "img.search.xemacs.org", this function will store the
store[st_count] = st_chain; \ chains for "img.search.xemacs.org", "search.xemacs.org", and
++st_count; \ "xemacs.org" -- those of them that exist (if any), that is.
} while (0)
/* Store cookie chains that match HOST. Since more than one chain can No more than SIZE matches are written; if more matches are present,
match, the matches are written to STORE. No more than SIZE matches return the number of chains that would have been written. */
are written; if more matches are present, return the number of
chains that would have been written. */
static int static int
find_matching_chains (struct cookie_jar *jar, const char *host, find_matching_chains (struct cookie_jar *jar, const char *host,
struct cookie *store[], int size) struct cookie *dest[], int dest_size)
{ {
struct cookie *chain; int dest_count = 0;
int dot_count; int passes, passcnt;
char *hash_key;
int count = 0;
if (!hash_table_count (jar->chains_by_domain)) if (!hash_table_count (jar->chains_by_domain))
return 0; return 0;
STRDUP_ALLOCA (hash_key, host); if (numeric_address_p (host))
/* If host is an IP address, only check for the exact match. */
passes = 1;
else
/* Otherwise, check all the subdomains except the top-level (last)
one. As a domain with N components has N-1 dots, the number of
passes equals the number of dots. */
passes = count_char (host, '.');
/* Look for an exact match. */ passcnt = 0;
chain = hash_table_get (jar->chains_by_domain, hash_key);
if (chain)
STORE_CHAIN (chain, store, size, count);
dot_count = count_char (host, '.'); /* Find chains that match HOST, starting with exact match and
progressing to less specific domains. For instance, given HOST
/* Match less and less specific domains. For instance, given fly.srk.fer.hr, first look for fly.srk.fer.hr's chain, then
fly.srk.fer.hr, we match .srk.fer.hr, then .fer.hr. */ srk.fer.hr's, then fer.hr's. */
while (dot_count-- > 1) while (1)
{ {
/* Note: we operate directly on hash_key (in form host:port) struct cookie *chain = hash_table_get (jar->chains_by_domain, host);
because we don't want to allocate new hash keys in a
loop. */
char *p = strchr (hash_key, '.');
assert (p != NULL);
chain = hash_table_get (jar->chains_by_domain, p);
if (chain) if (chain)
STORE_CHAIN (chain, store, size, count); {
hash_key = p + 1; if (dest_count < dest_size)
dest[dest_count] = chain;
++dest_count;
}
if (++passcnt >= passes)
break;
host = strchr (host, '.') + 1;
} }
return count;
return dest_count;
} }
/* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero /* If FULL_PATH begins with PREFIX, return the length of PREFIX, zero
@ -930,17 +940,17 @@ path_matches (const char *full_path, const char *prefix)
return len + 1; return len + 1;
} }
/* Return non-zero iff COOKIE matches the given PATH, PORT, and /* Return non-zero iff COOKIE matches the given HOST, PORT, PATH, and
security flag. HOST is not a flag because it is assumed that the SECFLAG.
cookie comes from the correct chain.
If PATH_GOODNESS is non-NULL, store the "path goodness" there. The If PATH_GOODNESS is non-NULL, store the "path goodness" value
said goodness is a measure of how well COOKIE matches PATH. It is there. That value is a measure of how well COOKIE matches PATH,
used for ordering cookies. */ used for ordering cookies. */
static int static int
matching_cookie (const struct cookie *cookie, const char *path, int port, matching_cookie (const struct cookie *cookie,
int connection_secure_p, int *path_goodness) const char *host, int port, const char *path,
int secure, int *path_goodness)
{ {
int pg; int pg;
@ -952,11 +962,19 @@ matching_cookie (const struct cookie *cookie, const char *path, int port,
possible. */ possible. */
return 0; return 0;
if (cookie->secure && !connection_secure_p) if (cookie->secure && !secure)
/* Don't transmit secure cookies over an insecure connection. */ /* Don't transmit secure cookies over insecure connections. */
return 0; return 0;
if (cookie->port != PORT_ANY && cookie->port != port) if (cookie->port != PORT_ANY && cookie->port != port)
return 0; return 0;
/* If exact domain match is required, verify that cookie's domain is
equal to HOST. If not, assume success on the grounds of the
cookie's chain having been found by find_matching_chains. */
if (cookie->domain_exact
&& 0 != strcasecmp (host, cookie->domain))
return 0;
pg = path_matches (path, cookie->path); pg = path_matches (path, cookie->path);
if (!pg) if (!pg)
return 0; return 0;
@ -969,6 +987,11 @@ matching_cookie (const struct cookie *cookie, const char *path, int port,
return 1; return 1;
} }
/* A structure that points to a cookie, along with the additional
information about the cookie's "goodness". This allows us to sort
the cookies when returning them to the server, as required by the
spec. */
struct weighed_cookie { struct weighed_cookie {
struct cookie *cookie; struct cookie *cookie;
int domain_goodness; int domain_goodness;
@ -1001,7 +1024,7 @@ eliminate_dups (struct weighed_cookie *outgoing, int count)
int i; int i;
/* We deploy a simple uniquify algorithm: first sort the array /* We deploy a simple uniquify algorithm: first sort the array
according to our sort criterion, then uniquify it by comparing according to our sort criteria, then uniquify it by comparing
each cookie with its neighbor. */ each cookie with its neighbor. */
qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator); qsort (outgoing, count, sizeof (struct weighed_cookie), equality_comparator);
@ -1059,7 +1082,7 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
int connection_secure_p) int connection_secure_p)
{ {
struct cookie *chain_default_store[20]; struct cookie *chain_default_store[20];
struct cookie **all_chains = chain_default_store; struct cookie **chains = chain_default_store;
int chain_store_size = countof (chain_default_store); int chain_store_size = countof (chain_default_store);
int chain_count; int chain_count;
@ -1069,15 +1092,16 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
char *result; char *result;
int result_size, pos; int result_size, pos;
/* First, find the chains that match HOST. */
again: again:
chain_count = find_matching_chains (jar, host, all_chains, chain_store_size); chain_count = find_matching_chains (jar, host, chains, chain_store_size);
if (chain_count > chain_store_size) if (chain_count > chain_store_size)
{ {
/* It's extremely unlikely that more than 20 chains will ever /* It's extremely unlikely that more than 20 chains will ever
match. But since find_matching_chains reports the exact size match. But since find_matching_chains reports the exact size
it needs, it's easy to not have the limitation, so we it needs, it's easy to not have the limitation, so we
don't. */ don't. */
all_chains = alloca (chain_count * sizeof (struct cookie *)); chains = alloca (chain_count * sizeof (struct cookie *));
chain_store_size = chain_count; chain_store_size = chain_count;
goto again; goto again;
} }
@ -1087,27 +1111,31 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
cookies_now = time (NULL); cookies_now = time (NULL);
/* Count the number of cookies whose path matches. */ /* Now extract from the chains those cookies that match our host
(for domain_exact cookies), port (for cookies with port other
than PORT_ANY), etc. See matching_cookie for details. */
/* Count the number of matching cookies. */
count = 0; count = 0;
for (i = 0; i < chain_count; i++) for (i = 0; i < chain_count; i++)
for (cookie = all_chains[i]; cookie; cookie = cookie->next) for (cookie = chains[i]; cookie; cookie = cookie->next)
if (matching_cookie (cookie, path, port, connection_secure_p, NULL)) if (matching_cookie (cookie, host, port, path, connection_secure_p, NULL))
++count; ++count;
if (!count) if (!count)
/* No matching cookies. */ return NULL; /* no cookies matched */
return NULL;
/* Allocate the array. */ /* Allocate the array. */
outgoing = alloca (count * sizeof (struct weighed_cookie)); outgoing = alloca (count * sizeof (struct weighed_cookie));
/* Fill the array with all the matching cookies from all the /* Fill the array with all the matching cookies from the chains that
matching chains. */ match HOST. */
ocnt = 0; ocnt = 0;
for (i = 0; i < chain_count; i++) for (i = 0; i < chain_count; i++)
for (cookie = all_chains[i]; cookie; cookie = cookie->next) for (cookie = chains[i]; cookie; cookie = cookie->next)
{ {
int pg; int pg;
if (!matching_cookie (cookie, path, port, connection_secure_p, &pg)) if (!matching_cookie (cookie, host, port, path,
connection_secure_p, &pg))
continue; continue;
outgoing[ocnt].cookie = cookie; outgoing[ocnt].cookie = cookie;
outgoing[ocnt].domain_goodness = strlen (cookie->domain); outgoing[ocnt].domain_goodness = strlen (cookie->domain);
@ -1169,8 +1197,9 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
} }
/* Support for loading and saving cookies. The format used for /* Support for loading and saving cookies. The format used for
loading and saving roughly matches the format of `cookies.txt' file loading and saving should be the format of the `cookies.txt' file
used by Netscape and Mozilla, at least the Unix versions. The used by Netscape and Mozilla, at least the Unix versions.
(Apparently IE can export cookies in that format as well.) The
format goes like this: format goes like this:
DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE DOMAIN DOMAIN-FLAG PATH SECURE-FLAG TIMESTAMP ATTR-NAME ATTR-VALUE
@ -1183,22 +1212,18 @@ cookie_jar_generate_cookie_header (struct cookie_jar *jar, const char *host,
ATTR-NAME -- name of the cookie attribute ATTR-NAME -- name of the cookie attribute
ATTR-VALUE -- value of the cookie attribute (empty if absent) ATTR-VALUE -- value of the cookie attribute (empty if absent)
The fields are separated by TABs (but Wget's loader recognizes any The fields are separated by TABs. All fields are mandatory, except
whitespace). All fields are mandatory, except for ATTR-VALUE. The for ATTR-VALUE. The `-FLAG' fields are boolean, their legal values
`-FLAG' fields are boolean, their legal values being "TRUE" and being "TRUE" and "FALSE'. Empty lines, lines consisting of
"FALSE'. Empty lines, lines consisting of whitespace only, and whitespace only, and comment lines (beginning with # optionally
comment lines (beginning with # optionally preceded by whitespace) preceded by whitespace) are ignored.
are ignored.
Example line from cookies.txt (split in two lines for readability): Example line from cookies.txt (split in two lines for readability):
.google.com TRUE / FALSE 2147368447 \ .google.com TRUE / FALSE 2147368447 \
PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012 PREF ID=34bb47565bbcd47b:LD=en:NR=20:TM=985172580:LM=985739012
DOMAIN-FLAG is currently not honored by Wget. The cookies whose */
domain begins with `.' are treated as if DOMAIN-FLAG were true,
while all other cookies are treated as if it were FALSE. */
/* If the region [B, E) ends with :<digits>, parse the number, return /* If the region [B, E) ends with :<digits>, parse the number, return
it, and store new boundary (location of the `:') to DOMAIN_E_PTR. it, and store new boundary (location of the `:') to DOMAIN_E_PTR.
@ -1222,20 +1247,14 @@ domain_port (const char *domain_b, const char *domain_e,
return port; return port;
} }
#define SKIP_WS(p) do { \ #define GET_WORD(p, b, e) do { \
while (*p && ISSPACE (*p)) \
++p; \
} while (0)
#define SET_WORD_BOUNDARIES(p, b, e) do { \
SKIP_WS (p); \
b = p; \ b = p; \
/* skip non-ws */ \ while (*p && *p != '\t') \
while (*p && !ISSPACE (*p)) \
++p; \ ++p; \
e = p; \ e = p; \
if (b == e) \ if (b == e || !*p) \
goto next; \ goto next; \
++p; \
} while (0) } while (0)
/* Load cookies from FILE. */ /* Load cookies from FILE. */
@ -1262,60 +1281,65 @@ cookie_jar_load (struct cookie_jar *jar, const char *file)
int port; int port;
char *domain_b = NULL, *domain_e = NULL; char *domain_b = NULL, *domain_e = NULL;
char *ignore_b = NULL, *ignore_e = NULL; char *domflag_b = NULL, *domflag_e = NULL;
char *path_b = NULL, *path_e = NULL; char *path_b = NULL, *path_e = NULL;
char *secure_b = NULL, *secure_e = NULL; char *secure_b = NULL, *secure_e = NULL;
char *expires_b = NULL, *expires_e = NULL; char *expires_b = NULL, *expires_e = NULL;
char *name_b = NULL, *name_e = NULL; char *name_b = NULL, *name_e = NULL;
char *value_b = NULL, *value_e = NULL; char *value_b = NULL, *value_e = NULL;
SKIP_WS (p); /* Skip leading white-space. */
while (*p && ISSPACE (*p))
++p;
/* Ignore empty lines. */
if (!*p || *p == '#') if (!*p || *p == '#')
/* empty line */
continue; continue;
SET_WORD_BOUNDARIES (p, domain_b, domain_e); GET_WORD (p, domain_b, domain_e);
SET_WORD_BOUNDARIES (p, ignore_b, ignore_e); GET_WORD (p, domflag_b, domflag_e);
SET_WORD_BOUNDARIES (p, path_b, path_e); GET_WORD (p, path_b, path_e);
SET_WORD_BOUNDARIES (p, secure_b, secure_e); GET_WORD (p, secure_b, secure_e);
SET_WORD_BOUNDARIES (p, expires_b, expires_e); GET_WORD (p, expires_b, expires_e);
SET_WORD_BOUNDARIES (p, name_b, name_e); GET_WORD (p, name_b, name_e);
/* Don't use SET_WORD_BOUNDARIES for value because it may /* Don't use GET_WORD for value because it ends with newline,
contain whitespace. Instead, set value_e to the end of line, not TAB. */
modulo trailing space (this will skip the line separator.) */
SKIP_WS (p);
value_b = p; value_b = p;
value_e = p + strlen (p); value_e = p + strlen (p);
while (value_e > value_b && ISSPACE (*(value_e - 1))) if (value_e > value_b && value_e[-1] == '\n')
--value_e; --value_e;
if (value_b == value_e) if (value_e > value_b && value_e[-1] == '\r')
/* Hmm, should we check for empty value? I guess that's --value_e;
legal, so I leave it. */ /* Empty values are legal (I think), so don't bother checking. */
;
cookie = cookie_new (); cookie = cookie_new ();
cookie->attr = strdupdelim (name_b, name_e); cookie->attr = strdupdelim (name_b, name_e);
cookie->value = strdupdelim (value_b, value_e); cookie->value = strdupdelim (value_b, value_e);
cookie->path = strdupdelim (path_b, path_e); cookie->path = strdupdelim (path_b, path_e);
cookie->secure = BOUNDED_EQUAL (secure_b, secure_e, "TRUE");
if (BOUNDED_EQUAL (secure_b, secure_e, "TRUE")) /* Curl source says, quoting Andre Garcia: "flag: A TRUE/FALSE
cookie->secure = 1; value indicating if all machines within a given domain can
access the variable. This value is set automatically by the
browser, depending on the value set for the domain." */
cookie->domain_exact = !BOUNDED_EQUAL (domflag_b, domflag_e, "TRUE");
/* DOMAIN needs special treatment because we might need to /* DOMAIN needs special treatment because we might need to
extract the port. */ extract the port. */
port = domain_port (domain_b, domain_e, (const char **)&domain_e); port = domain_port (domain_b, domain_e, (const char **)&domain_e);
if (port) if (port)
cookie->port = port; cookie->port = port;
if (*domain_b == '.')
++domain_b; /* remove leading dot internally */
cookie->domain = strdupdelim (domain_b, domain_e); cookie->domain = strdupdelim (domain_b, domain_e);
/* safe default in case EXPIRES field is garbled. */ /* safe default in case EXPIRES field is garbled. */
expiry = (double)cookies_now - 1; expiry = (double)cookies_now - 1;
/* I don't like changing the line, but it's completely safe. /* I don't like changing the line, but it's safe here. (line is
(line is malloced.) */ malloced.) */
*expires_e = '\0'; *expires_e = '\0';
sscanf (expires_b, "%lf", &expiry); sscanf (expires_b, "%lf", &expiry);
if (expiry < cookies_now) if (expiry < cookies_now)
@ -1347,21 +1371,23 @@ save_cookies_mapper (void *key, void *value, void *arg)
{ {
FILE *fp = (FILE *)arg; FILE *fp = (FILE *)arg;
char *domain = (char *)key; char *domain = (char *)key;
struct cookie *chain = (struct cookie *)value; struct cookie *cookie = (struct cookie *)value;
for (; chain; chain = chain->next) for (; cookie; cookie = cookie->next)
{ {
if (!chain->permanent) if (!cookie->permanent)
continue; continue;
if (COOKIE_EXPIRED_P (chain)) if (COOKIE_EXPIRED_P (cookie))
continue; continue;
if (!cookie->domain_exact)
fputc ('.', fp);
fputs (domain, fp); fputs (domain, fp);
if (chain->port != PORT_ANY) if (cookie->port != PORT_ANY)
fprintf (fp, ":%d", chain->port); fprintf (fp, ":%d", cookie->port);
fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n", fprintf (fp, "\t%s\t%s\t%s\t%.0f\t%s\t%s\n",
*domain == '.' ? "TRUE" : "FALSE", cookie->domain_exact ? "FALSE" : "TRUE",
chain->path, chain->secure ? "TRUE" : "FALSE", cookie->path, cookie->secure ? "TRUE" : "FALSE",
(double)chain->expiry_time, (double)cookie->expiry_time,
chain->attr, chain->value); cookie->attr, cookie->value);
if (ferror (fp)) if (ferror (fp))
return 1; /* stop mapping */ return 1; /* stop mapping */
} }