[svn] Committed my patches from <sxsbsw16sbu.fsf@florida.arsdigita.de>

and <sxsvgu824xk.fsf@florida.arsdigita.de>.
This commit is contained in:
hniksic 2000-10-31 11:25:32 -08:00
parent 5f96643297
commit 0dd418242a
7 changed files with 93 additions and 58 deletions

View File

@ -1,3 +1,20 @@
2000-10-31 Hrvoje Niksic <hniksic@arsdigita.com>
* ftp.c (getftp): Ditto.
* http.c (gethttp): Rewind the stream when retrying from scratch.
2000-10-31 Hrvoje Niksic <hniksic@arsdigita.com>
* retr.c (retrieve_url): Use url_concat() to handle relative
redirections instead of /ad hoc/ code.
* url.c (url_concat): New function encapsulating weird
construct().
(urllen_http_hack): New function.
(construct): When constructing new URLs, recognize that `?' does
not form part of the file name in HTTP.
2000-10-13 Adrian Aichner <adrian@xemacs.org> 2000-10-13 Adrian Aichner <adrian@xemacs.org>
* retr.c: Add msec timing support for WINDOWS. * retr.c: Add msec timing support for WINDOWS.

View File

@ -648,6 +648,15 @@ Error in server response, closing control connection.\n"));
expected_bytes = ftp_expected_bytes (ftp_last_respline); expected_bytes = ftp_expected_bytes (ftp_last_respline);
} /* cmd & DO_LIST */ } /* cmd & DO_LIST */
/* Some FTP servers return the total length of file after REST
command, others just return the remaining size. */
if (*len && restval && expected_bytes
&& (expected_bytes == *len - restval))
{
DEBUGP (("Lying FTP server found, adjusting.\n"));
expected_bytes = *len;
}
/* If no transmission was required, then everything is OK. */ /* If no transmission was required, then everything is OK. */
if (!(cmd & (DO_LIST | DO_RETR))) if (!(cmd & (DO_LIST | DO_RETR)))
return RETRFINISHED; return RETRFINISHED;
@ -685,16 +694,16 @@ Error in server response, closing control connection.\n"));
} }
} }
else else
fp = opt.dfp; {
fp = opt.dfp;
/* Some FTP servers return the total length of file after REST command, if (!restval)
others just return the remaining size. */ {
if (*len && restval && expected_bytes /* This will silently fail for streams that don't correspond
&& (expected_bytes == *len - restval)) to regular files, but that's OK. */
{ rewind (fp);
DEBUGP (("Lying FTP server found, adjusting.\n")); clearerr (fp);
expected_bytes = *len; }
} }
if (*len) if (*len)
{ {

View File

@ -843,8 +843,17 @@ Accept: %s\r\n\
return FOPENERR; return FOPENERR;
} }
} }
else /* opt.dfp */ else /* opt.dfp */
fp = opt.dfp; {
fp = opt.dfp;
if (!hs->restval)
{
/* This will silently fail for streams that don't correspond
to regular files, but that's OK. */
rewind (fp);
clearerr (fp);
}
}
/* #### This confuses the code that checks for file size. There /* #### This confuses the code that checks for file size. There
should be some overhead information. */ should be some overhead information. */

View File

@ -76,7 +76,6 @@ CMD_DECLARE (cmd_spec_dotstyle);
CMD_DECLARE (cmd_spec_header); CMD_DECLARE (cmd_spec_header);
CMD_DECLARE (cmd_spec_htmlify); CMD_DECLARE (cmd_spec_htmlify);
CMD_DECLARE (cmd_spec_mirror); CMD_DECLARE (cmd_spec_mirror);
CMD_DECLARE (cmd_spec_outputdocument);
CMD_DECLARE (cmd_spec_recursive); CMD_DECLARE (cmd_spec_recursive);
CMD_DECLARE (cmd_spec_useragent); CMD_DECLARE (cmd_spec_useragent);
@ -139,7 +138,7 @@ static struct {
{ "noparent", &opt.no_parent, cmd_boolean }, { "noparent", &opt.no_parent, cmd_boolean },
{ "noproxy", &opt.no_proxy, cmd_vector }, { "noproxy", &opt.no_proxy, cmd_vector },
{ "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/ { "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
{ "outputdocument", NULL, cmd_spec_outputdocument }, { "outputdocument", &opt.output_document, cmd_string },
{ "pagerequisites", &opt.page_requisites, cmd_boolean }, { "pagerequisites", &opt.page_requisites, cmd_boolean },
{ "passiveftp", &opt.ftp_pasv, cmd_lockable_boolean }, { "passiveftp", &opt.ftp_pasv, cmd_lockable_boolean },
{ "passwd", &opt.ftp_pass, cmd_string }, { "passwd", &opt.ftp_pass, cmd_string },
@ -915,15 +914,6 @@ cmd_spec_mirror (const char *com, const char *val, void *closure)
return 1; return 1;
} }
static int
cmd_spec_outputdocument (const char *com, const char *val, void *closure)
{
FREE_MAYBE (opt.output_document);
opt.output_document = xstrdup (val);
opt.ntry = 1;
return 1;
}
static int static int
cmd_spec_recursive (const char *com, const char *val, void *closure) cmd_spec_recursive (const char *com, const char *val, void *closure)
{ {

View File

@ -337,9 +337,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
again: again:
u = newurl (); u = newurl ();
/* Parse the URL. RFC2068 requires `Location' to contain an /* Parse the URL. */
absoluteURI, but many sites break this requirement. #### We
should be liberal and accept a relative location, too. */
result = parseurl (url, u, already_redirected); result = parseurl (url, u, already_redirected);
if (result != URLOK) if (result != URLOK)
{ {
@ -426,40 +424,26 @@ retrieve_url (const char *origurl, char **file, char **newloc,
location_changed = (result == NEWLOCATION); location_changed = (result == NEWLOCATION);
if (location_changed) if (location_changed)
{ {
/* Check for redirection to oneself. */ if (mynewloc)
{
/* The HTTP specs only allow absolute URLs to appear in
redirects, but a ton of boneheaded webservers and CGIs
out there break the rules and use relative URLs, and
popular browsers are lenient about this, so wget should
be too. */
char *construced_newloc = url_concat (url, mynewloc);
free (mynewloc);
mynewloc = construced_newloc;
}
/* Check for redirection to back to itself. */
if (url_equal (url, mynewloc)) if (url_equal (url, mynewloc))
{ {
logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"), logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
mynewloc); mynewloc);
return WRONGCODE; return WRONGCODE;
} }
if (mynewloc) free (url);
{ url = mynewloc;
/* The HTTP specs only allow absolute URLs to appear in redirects, but
a ton of boneheaded webservers and CGIs out there break the rules
and use relative URLs, and popular browsers are lenient about this,
so wget should be too. */
if (strstr(mynewloc, "://") == NULL)
/* Doesn't look like an absolute URL (this check will incorrectly
think that rare relative URLs containing "://" later in the
string are absolute). */
{
char *temp = malloc(strlen(url) + strlen(mynewloc) + 1);
if (mynewloc[0] == '/')
/* "Hostless absolute" URL. Convert to absolute. */
sprintf(temp,"%s%s", url, mynewloc);
else
/* Relative URL. Convert to absolute. */
sprintf(temp,"%s/%s", url, mynewloc);
free(mynewloc);
mynewloc = temp;
}
free (url);
url = mynewloc;
}
freeurl (u, 1); freeurl (u, 1);
already_redirected = 1; already_redirected = 1;
goto again; goto again;

View File

@ -1266,6 +1266,23 @@ url_filename (const struct urlinfo *u)
return name; return name;
} }
/* Like strlen(), except if `?' is present in the URL and its protocol
is HTTP, act as if `?' is the end of the string. Needed for the
correct implementation of `construct' below, at least until we code
up proper parsing of URLs. */
static int
urllen_http_hack (const char *url)
{
if ((!strncmp (url, "http://", 7)
|| !strncmp (url, "https://", 7)))
{
const char *q = strchr (url, '?');
if (q)
return q - url;
}
return strlen (url);
}
/* Construct an absolute URL, given a (possibly) relative one. This /* Construct an absolute URL, given a (possibly) relative one. This
is more tricky than it might seem, but it works. */ is more tricky than it might seem, but it works. */
static char * static char *
@ -1279,12 +1296,12 @@ construct (const char *url, const char *sub, int subsize, int no_proto)
if (*sub != '/') if (*sub != '/')
{ {
for (i = strlen (url); i && url[i] != '/'; i--); for (i = urllen_http_hack (url); i && url[i] != '/'; i--);
if (!i || (url[i] == url[i - 1])) if (!i || (url[i] == url[i - 1]))
{ {
int l = strlen (url); int l = urllen_http_hack (url);
char *t = (char *)alloca (l + 2); char *t = (char *)alloca (l + 2);
strcpy (t, url); memcpy (t, url, l);
t[l] = '/'; t[l] = '/';
t[l + 1] = '\0'; t[l + 1] = '\0';
url = t; url = t;
@ -1312,7 +1329,7 @@ construct (const char *url, const char *sub, int subsize, int no_proto)
while (fl); while (fl);
if (!url[i]) if (!url[i])
{ {
int l = strlen (url); int l = urllen_http_hack (url);
char *t = (char *)alloca (l + 2); char *t = (char *)alloca (l + 2);
strcpy (t, url); strcpy (t, url);
t[l] = '/'; t[l] = '/';
@ -1334,6 +1351,13 @@ construct (const char *url, const char *sub, int subsize, int no_proto)
} }
return constr; return constr;
} }
/* Like the function above, but with a saner caller interface. */
char *
url_concat (const char *base_url, const char *new_url)
{
return construct (base_url, new_url, strlen (new_url), !has_proto (new_url));
}
/* Optimize URL by host, destructively replacing u->host with realhost /* Optimize URL by host, destructively replacing u->host with realhost
(u->host). Do this regardless of opt.simple_check. */ (u->host). Do this regardless of opt.simple_check. */

View File

@ -98,6 +98,8 @@ urlpos *get_urls_file PARAMS ((const char *));
urlpos *get_urls_html PARAMS ((const char *, const char *, int, int)); urlpos *get_urls_html PARAMS ((const char *, const char *, int, int));
void free_urlpos PARAMS ((urlpos *)); void free_urlpos PARAMS ((urlpos *));
char *url_concat PARAMS ((const char *, const char *));
void rotate_backups PARAMS ((const char *)); void rotate_backups PARAMS ((const char *));
int mkalldirs PARAMS ((const char *)); int mkalldirs PARAMS ((const char *));
char *url_filename PARAMS ((const struct urlinfo *)); char *url_filename PARAMS ((const struct urlinfo *));