mirror of
https://github.com/mirror/wget.git
synced 2025-04-01 15:10:58 +08:00
[svn] Committed my patches from <sxsbsw16sbu.fsf@florida.arsdigita.de>
and <sxsvgu824xk.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
5f96643297
commit
0dd418242a
@ -1,3 +1,20 @@
|
|||||||
|
2000-10-31 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* ftp.c (getftp): Ditto.
|
||||||
|
|
||||||
|
* http.c (gethttp): Rewind the stream when retrying from scratch.
|
||||||
|
|
||||||
|
2000-10-31 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||||
|
|
||||||
|
* retr.c (retrieve_url): Use url_concat() to handle relative
|
||||||
|
redirections instead of /ad hoc/ code.
|
||||||
|
|
||||||
|
* url.c (url_concat): New function encapsulating weird
|
||||||
|
construct().
|
||||||
|
(urllen_http_hack): New function.
|
||||||
|
(construct): When constructing new URLs, recognize that `?' does
|
||||||
|
not form part of the file name in HTTP.
|
||||||
|
|
||||||
2000-10-13 Adrian Aichner <adrian@xemacs.org>
|
2000-10-13 Adrian Aichner <adrian@xemacs.org>
|
||||||
|
|
||||||
* retr.c: Add msec timing support for WINDOWS.
|
* retr.c: Add msec timing support for WINDOWS.
|
||||||
|
29
src/ftp.c
29
src/ftp.c
@ -648,6 +648,15 @@ Error in server response, closing control connection.\n"));
|
|||||||
expected_bytes = ftp_expected_bytes (ftp_last_respline);
|
expected_bytes = ftp_expected_bytes (ftp_last_respline);
|
||||||
} /* cmd & DO_LIST */
|
} /* cmd & DO_LIST */
|
||||||
|
|
||||||
|
/* Some FTP servers return the total length of file after REST
|
||||||
|
command, others just return the remaining size. */
|
||||||
|
if (*len && restval && expected_bytes
|
||||||
|
&& (expected_bytes == *len - restval))
|
||||||
|
{
|
||||||
|
DEBUGP (("Lying FTP server found, adjusting.\n"));
|
||||||
|
expected_bytes = *len;
|
||||||
|
}
|
||||||
|
|
||||||
/* If no transmission was required, then everything is OK. */
|
/* If no transmission was required, then everything is OK. */
|
||||||
if (!(cmd & (DO_LIST | DO_RETR)))
|
if (!(cmd & (DO_LIST | DO_RETR)))
|
||||||
return RETRFINISHED;
|
return RETRFINISHED;
|
||||||
@ -685,16 +694,16 @@ Error in server response, closing control connection.\n"));
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
fp = opt.dfp;
|
{
|
||||||
|
fp = opt.dfp;
|
||||||
/* Some FTP servers return the total length of file after REST command,
|
if (!restval)
|
||||||
others just return the remaining size. */
|
{
|
||||||
if (*len && restval && expected_bytes
|
/* This will silently fail for streams that don't correspond
|
||||||
&& (expected_bytes == *len - restval))
|
to regular files, but that's OK. */
|
||||||
{
|
rewind (fp);
|
||||||
DEBUGP (("Lying FTP server found, adjusting.\n"));
|
clearerr (fp);
|
||||||
expected_bytes = *len;
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (*len)
|
if (*len)
|
||||||
{
|
{
|
||||||
|
13
src/http.c
13
src/http.c
@ -843,8 +843,17 @@ Accept: %s\r\n\
|
|||||||
return FOPENERR;
|
return FOPENERR;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
else /* opt.dfp */
|
else /* opt.dfp */
|
||||||
fp = opt.dfp;
|
{
|
||||||
|
fp = opt.dfp;
|
||||||
|
if (!hs->restval)
|
||||||
|
{
|
||||||
|
/* This will silently fail for streams that don't correspond
|
||||||
|
to regular files, but that's OK. */
|
||||||
|
rewind (fp);
|
||||||
|
clearerr (fp);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/* #### This confuses the code that checks for file size. There
|
/* #### This confuses the code that checks for file size. There
|
||||||
should be some overhead information. */
|
should be some overhead information. */
|
||||||
|
12
src/init.c
12
src/init.c
@ -76,7 +76,6 @@ CMD_DECLARE (cmd_spec_dotstyle);
|
|||||||
CMD_DECLARE (cmd_spec_header);
|
CMD_DECLARE (cmd_spec_header);
|
||||||
CMD_DECLARE (cmd_spec_htmlify);
|
CMD_DECLARE (cmd_spec_htmlify);
|
||||||
CMD_DECLARE (cmd_spec_mirror);
|
CMD_DECLARE (cmd_spec_mirror);
|
||||||
CMD_DECLARE (cmd_spec_outputdocument);
|
|
||||||
CMD_DECLARE (cmd_spec_recursive);
|
CMD_DECLARE (cmd_spec_recursive);
|
||||||
CMD_DECLARE (cmd_spec_useragent);
|
CMD_DECLARE (cmd_spec_useragent);
|
||||||
|
|
||||||
@ -139,7 +138,7 @@ static struct {
|
|||||||
{ "noparent", &opt.no_parent, cmd_boolean },
|
{ "noparent", &opt.no_parent, cmd_boolean },
|
||||||
{ "noproxy", &opt.no_proxy, cmd_vector },
|
{ "noproxy", &opt.no_proxy, cmd_vector },
|
||||||
{ "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
|
{ "numtries", &opt.ntry, cmd_number_inf },/* deprecated*/
|
||||||
{ "outputdocument", NULL, cmd_spec_outputdocument },
|
{ "outputdocument", &opt.output_document, cmd_string },
|
||||||
{ "pagerequisites", &opt.page_requisites, cmd_boolean },
|
{ "pagerequisites", &opt.page_requisites, cmd_boolean },
|
||||||
{ "passiveftp", &opt.ftp_pasv, cmd_lockable_boolean },
|
{ "passiveftp", &opt.ftp_pasv, cmd_lockable_boolean },
|
||||||
{ "passwd", &opt.ftp_pass, cmd_string },
|
{ "passwd", &opt.ftp_pass, cmd_string },
|
||||||
@ -915,15 +914,6 @@ cmd_spec_mirror (const char *com, const char *val, void *closure)
|
|||||||
return 1;
|
return 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
static int
|
|
||||||
cmd_spec_outputdocument (const char *com, const char *val, void *closure)
|
|
||||||
{
|
|
||||||
FREE_MAYBE (opt.output_document);
|
|
||||||
opt.output_document = xstrdup (val);
|
|
||||||
opt.ntry = 1;
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
static int
|
static int
|
||||||
cmd_spec_recursive (const char *com, const char *val, void *closure)
|
cmd_spec_recursive (const char *com, const char *val, void *closure)
|
||||||
{
|
{
|
||||||
|
46
src/retr.c
46
src/retr.c
@ -337,9 +337,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
|
|
||||||
again:
|
again:
|
||||||
u = newurl ();
|
u = newurl ();
|
||||||
/* Parse the URL. RFC2068 requires `Location' to contain an
|
/* Parse the URL. */
|
||||||
absoluteURI, but many sites break this requirement. #### We
|
|
||||||
should be liberal and accept a relative location, too. */
|
|
||||||
result = parseurl (url, u, already_redirected);
|
result = parseurl (url, u, already_redirected);
|
||||||
if (result != URLOK)
|
if (result != URLOK)
|
||||||
{
|
{
|
||||||
@ -426,40 +424,26 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
location_changed = (result == NEWLOCATION);
|
location_changed = (result == NEWLOCATION);
|
||||||
if (location_changed)
|
if (location_changed)
|
||||||
{
|
{
|
||||||
/* Check for redirection to oneself. */
|
if (mynewloc)
|
||||||
|
{
|
||||||
|
/* The HTTP specs only allow absolute URLs to appear in
|
||||||
|
redirects, but a ton of boneheaded webservers and CGIs
|
||||||
|
out there break the rules and use relative URLs, and
|
||||||
|
popular browsers are lenient about this, so wget should
|
||||||
|
be too. */
|
||||||
|
char *construced_newloc = url_concat (url, mynewloc);
|
||||||
|
free (mynewloc);
|
||||||
|
mynewloc = construced_newloc;
|
||||||
|
}
|
||||||
|
/* Check for redirection to back to itself. */
|
||||||
if (url_equal (url, mynewloc))
|
if (url_equal (url, mynewloc))
|
||||||
{
|
{
|
||||||
logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
|
logprintf (LOG_NOTQUIET, _("%s: Redirection to itself.\n"),
|
||||||
mynewloc);
|
mynewloc);
|
||||||
return WRONGCODE;
|
return WRONGCODE;
|
||||||
}
|
}
|
||||||
if (mynewloc)
|
free (url);
|
||||||
{
|
url = mynewloc;
|
||||||
/* The HTTP specs only allow absolute URLs to appear in redirects, but
|
|
||||||
a ton of boneheaded webservers and CGIs out there break the rules
|
|
||||||
and use relative URLs, and popular browsers are lenient about this,
|
|
||||||
so wget should be too. */
|
|
||||||
if (strstr(mynewloc, "://") == NULL)
|
|
||||||
/* Doesn't look like an absolute URL (this check will incorrectly
|
|
||||||
think that rare relative URLs containing "://" later in the
|
|
||||||
string are absolute). */
|
|
||||||
{
|
|
||||||
char *temp = malloc(strlen(url) + strlen(mynewloc) + 1);
|
|
||||||
|
|
||||||
if (mynewloc[0] == '/')
|
|
||||||
/* "Hostless absolute" URL. Convert to absolute. */
|
|
||||||
sprintf(temp,"%s%s", url, mynewloc);
|
|
||||||
else
|
|
||||||
/* Relative URL. Convert to absolute. */
|
|
||||||
sprintf(temp,"%s/%s", url, mynewloc);
|
|
||||||
|
|
||||||
free(mynewloc);
|
|
||||||
mynewloc = temp;
|
|
||||||
}
|
|
||||||
|
|
||||||
free (url);
|
|
||||||
url = mynewloc;
|
|
||||||
}
|
|
||||||
freeurl (u, 1);
|
freeurl (u, 1);
|
||||||
already_redirected = 1;
|
already_redirected = 1;
|
||||||
goto again;
|
goto again;
|
||||||
|
32
src/url.c
32
src/url.c
@ -1266,6 +1266,23 @@ url_filename (const struct urlinfo *u)
|
|||||||
return name;
|
return name;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Like strlen(), except if `?' is present in the URL and its protocol
|
||||||
|
is HTTP, act as if `?' is the end of the string. Needed for the
|
||||||
|
correct implementation of `construct' below, at least until we code
|
||||||
|
up proper parsing of URLs. */
|
||||||
|
static int
|
||||||
|
urllen_http_hack (const char *url)
|
||||||
|
{
|
||||||
|
if ((!strncmp (url, "http://", 7)
|
||||||
|
|| !strncmp (url, "https://", 7)))
|
||||||
|
{
|
||||||
|
const char *q = strchr (url, '?');
|
||||||
|
if (q)
|
||||||
|
return q - url;
|
||||||
|
}
|
||||||
|
return strlen (url);
|
||||||
|
}
|
||||||
|
|
||||||
/* Construct an absolute URL, given a (possibly) relative one. This
|
/* Construct an absolute URL, given a (possibly) relative one. This
|
||||||
is more tricky than it might seem, but it works. */
|
is more tricky than it might seem, but it works. */
|
||||||
static char *
|
static char *
|
||||||
@ -1279,12 +1296,12 @@ construct (const char *url, const char *sub, int subsize, int no_proto)
|
|||||||
|
|
||||||
if (*sub != '/')
|
if (*sub != '/')
|
||||||
{
|
{
|
||||||
for (i = strlen (url); i && url[i] != '/'; i--);
|
for (i = urllen_http_hack (url); i && url[i] != '/'; i--);
|
||||||
if (!i || (url[i] == url[i - 1]))
|
if (!i || (url[i] == url[i - 1]))
|
||||||
{
|
{
|
||||||
int l = strlen (url);
|
int l = urllen_http_hack (url);
|
||||||
char *t = (char *)alloca (l + 2);
|
char *t = (char *)alloca (l + 2);
|
||||||
strcpy (t, url);
|
memcpy (t, url, l);
|
||||||
t[l] = '/';
|
t[l] = '/';
|
||||||
t[l + 1] = '\0';
|
t[l + 1] = '\0';
|
||||||
url = t;
|
url = t;
|
||||||
@ -1312,7 +1329,7 @@ construct (const char *url, const char *sub, int subsize, int no_proto)
|
|||||||
while (fl);
|
while (fl);
|
||||||
if (!url[i])
|
if (!url[i])
|
||||||
{
|
{
|
||||||
int l = strlen (url);
|
int l = urllen_http_hack (url);
|
||||||
char *t = (char *)alloca (l + 2);
|
char *t = (char *)alloca (l + 2);
|
||||||
strcpy (t, url);
|
strcpy (t, url);
|
||||||
t[l] = '/';
|
t[l] = '/';
|
||||||
@ -1334,6 +1351,13 @@ construct (const char *url, const char *sub, int subsize, int no_proto)
|
|||||||
}
|
}
|
||||||
return constr;
|
return constr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Like the function above, but with a saner caller interface. */
|
||||||
|
char *
|
||||||
|
url_concat (const char *base_url, const char *new_url)
|
||||||
|
{
|
||||||
|
return construct (base_url, new_url, strlen (new_url), !has_proto (new_url));
|
||||||
|
}
|
||||||
|
|
||||||
/* Optimize URL by host, destructively replacing u->host with realhost
|
/* Optimize URL by host, destructively replacing u->host with realhost
|
||||||
(u->host). Do this regardless of opt.simple_check. */
|
(u->host). Do this regardless of opt.simple_check. */
|
||||||
|
@ -98,6 +98,8 @@ urlpos *get_urls_file PARAMS ((const char *));
|
|||||||
urlpos *get_urls_html PARAMS ((const char *, const char *, int, int));
|
urlpos *get_urls_html PARAMS ((const char *, const char *, int, int));
|
||||||
void free_urlpos PARAMS ((urlpos *));
|
void free_urlpos PARAMS ((urlpos *));
|
||||||
|
|
||||||
|
char *url_concat PARAMS ((const char *, const char *));
|
||||||
|
|
||||||
void rotate_backups PARAMS ((const char *));
|
void rotate_backups PARAMS ((const char *));
|
||||||
int mkalldirs PARAMS ((const char *));
|
int mkalldirs PARAMS ((const char *));
|
||||||
char *url_filename PARAMS ((const struct urlinfo *));
|
char *url_filename PARAMS ((const struct urlinfo *));
|
||||||
|
Loading…
Reference in New Issue
Block a user