mirror of
https://github.com/mirror/wget.git
synced 2025-01-08 19:30:41 +08:00
[svn] Fix URL parsing bugs.
This commit is contained in:
parent
80080604f4
commit
f65e63492b
@ -1,3 +1,10 @@
|
|||||||
|
2005-07-01 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
|
* url.c (url_parse): Make sure u->params is not initialized for
|
||||||
|
http/https URLs.
|
||||||
|
(url_parse): Don't crash on garbage following []-delimited IPv6
|
||||||
|
addresses.
|
||||||
|
|
||||||
2005-07-01 Hrvoje Niksic <hniksic@xemacs.org>
|
2005-07-01 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
* main.c (print_help): Don't refer to the non-existent -nr in
|
* main.c (print_help): Don't refer to the non-existent -nr in
|
||||||
|
110
src/url.c
110
src/url.c
@ -45,20 +45,29 @@ so, delete this exception statement from your version. */
|
|||||||
|
|
||||||
struct scheme_data
|
struct scheme_data
|
||||||
{
|
{
|
||||||
|
/* Short name of the scheme, such as "http" or "ftp". */
|
||||||
const char *name;
|
const char *name;
|
||||||
|
/* Leading string that identifies the scheme, such as "https://". */
|
||||||
const char *leading_string;
|
const char *leading_string;
|
||||||
|
/* Default port of the scheme when none is specified. */
|
||||||
int default_port;
|
int default_port;
|
||||||
bool enabled;
|
/* Used for disabling https when OpenSSL fails to init. */
|
||||||
|
bool disabled;
|
||||||
|
/* Allowed separators, handled by url_parse. For example, ftp
|
||||||
|
doesn't support the "?query", and http/https don't support
|
||||||
|
";params". All schemes must support at least "/:". */
|
||||||
|
const char *separators;
|
||||||
|
int flags;
|
||||||
};
|
};
|
||||||
|
|
||||||
/* Supported schemes: */
|
/* Supported schemes: */
|
||||||
static struct scheme_data supported_schemes[] =
|
static struct scheme_data supported_schemes[] =
|
||||||
{
|
{
|
||||||
{ "http", "http://", DEFAULT_HTTP_PORT, 1 },
|
{ "http", "http://", DEFAULT_HTTP_PORT, false, "/:?#" },
|
||||||
#ifdef HAVE_SSL
|
#ifdef HAVE_SSL
|
||||||
{ "https", "https://", DEFAULT_HTTPS_PORT, 1 },
|
{ "https", "https://", DEFAULT_HTTPS_PORT, false, "/:?#" },
|
||||||
#endif
|
#endif
|
||||||
{ "ftp", "ftp://", DEFAULT_FTP_PORT, 1 },
|
{ "ftp", "ftp://", DEFAULT_FTP_PORT, false, "/:;#" },
|
||||||
|
|
||||||
/* SCHEME_INVALID */
|
/* SCHEME_INVALID */
|
||||||
{ NULL, NULL, -1, 0 }
|
{ NULL, NULL, -1, 0 }
|
||||||
@ -404,7 +413,7 @@ url_scheme (const char *url)
|
|||||||
if (0 == strncasecmp (url, supported_schemes[i].leading_string,
|
if (0 == strncasecmp (url, supported_schemes[i].leading_string,
|
||||||
strlen (supported_schemes[i].leading_string)))
|
strlen (supported_schemes[i].leading_string)))
|
||||||
{
|
{
|
||||||
if (supported_schemes[i].enabled)
|
if (!(supported_schemes[i].disabled))
|
||||||
return (enum url_scheme) i;
|
return (enum url_scheme) i;
|
||||||
else
|
else
|
||||||
return SCHEME_INVALID;
|
return SCHEME_INVALID;
|
||||||
@ -444,7 +453,7 @@ scheme_default_port (enum url_scheme scheme)
|
|||||||
void
|
void
|
||||||
scheme_disable (enum url_scheme scheme)
|
scheme_disable (enum url_scheme scheme)
|
||||||
{
|
{
|
||||||
supported_schemes[scheme].enabled = false;
|
supported_schemes[scheme].disabled = true;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Skip the username and password, if present in the URL. The
|
/* Skip the username and password, if present in the URL. The
|
||||||
@ -617,8 +626,8 @@ static const char *parse_errors[] = {
|
|||||||
N_("No error"),
|
N_("No error"),
|
||||||
#define PE_UNSUPPORTED_SCHEME 1
|
#define PE_UNSUPPORTED_SCHEME 1
|
||||||
N_("Unsupported scheme"),
|
N_("Unsupported scheme"),
|
||||||
#define PE_EMPTY_HOST 2
|
#define PE_INVALID_HOST_NAME 2
|
||||||
N_("Empty host"),
|
N_("Invalid host name"),
|
||||||
#define PE_BAD_PORT_NUMBER 3
|
#define PE_BAD_PORT_NUMBER 3
|
||||||
N_("Bad port number"),
|
N_("Bad port number"),
|
||||||
#define PE_INVALID_USER_NAME 4
|
#define PE_INVALID_USER_NAME 4
|
||||||
@ -644,6 +653,7 @@ url_parse (const char *url, int *error)
|
|||||||
bool path_modified, host_modified;
|
bool path_modified, host_modified;
|
||||||
|
|
||||||
enum url_scheme scheme;
|
enum url_scheme scheme;
|
||||||
|
const char *seps;
|
||||||
|
|
||||||
const char *uname_b, *uname_e;
|
const char *uname_b, *uname_e;
|
||||||
const char *host_b, *host_e;
|
const char *host_b, *host_e;
|
||||||
@ -682,10 +692,16 @@ url_parse (const char *url, int *error)
|
|||||||
|
|
||||||
scheme://host[:port][/path][;params][?query][#fragment] */
|
scheme://host[:port][/path][;params][?query][#fragment] */
|
||||||
|
|
||||||
|
path_b = path_e = NULL;
|
||||||
params_b = params_e = NULL;
|
params_b = params_e = NULL;
|
||||||
query_b = query_e = NULL;
|
query_b = query_e = NULL;
|
||||||
fragment_b = fragment_e = NULL;
|
fragment_b = fragment_e = NULL;
|
||||||
|
|
||||||
|
/* Initialize separators for optional parts of URL, depending on the
|
||||||
|
scheme. For example, FTP has params, and HTTP and HTTPS have
|
||||||
|
query string and fragment. */
|
||||||
|
seps = supported_schemes[scheme].separators;
|
||||||
|
|
||||||
host_b = p;
|
host_b = p;
|
||||||
|
|
||||||
if (*p == '[')
|
if (*p == '[')
|
||||||
@ -718,16 +734,28 @@ url_parse (const char *url, int *error)
|
|||||||
error_code = PE_IPV6_NOT_SUPPORTED;
|
error_code = PE_IPV6_NOT_SUPPORTED;
|
||||||
goto error;
|
goto error;
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
/* The closing bracket must be followed by a separator or by the
|
||||||
|
null char. */
|
||||||
|
/* http://[::1]... */
|
||||||
|
/* ^ */
|
||||||
|
if (!strchr (seps, *p))
|
||||||
|
{
|
||||||
|
/* Trailing garbage after []-delimited IPv6 address. */
|
||||||
|
error_code = PE_INVALID_HOST_NAME;
|
||||||
|
goto error;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
p = strpbrk_or_eos (p, ":/;?#");
|
p = strpbrk_or_eos (p, seps);
|
||||||
host_e = p;
|
host_e = p;
|
||||||
}
|
}
|
||||||
|
++seps; /* advance to '/' */
|
||||||
|
|
||||||
if (host_b == host_e)
|
if (host_b == host_e)
|
||||||
{
|
{
|
||||||
error_code = PE_EMPTY_HOST;
|
error_code = PE_INVALID_HOST_NAME;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -740,12 +768,11 @@ url_parse (const char *url, int *error)
|
|||||||
/* ^ */
|
/* ^ */
|
||||||
++p;
|
++p;
|
||||||
port_b = p;
|
port_b = p;
|
||||||
p = strpbrk_or_eos (p, "/;?#");
|
p = strpbrk_or_eos (p, seps);
|
||||||
port_e = p;
|
port_e = p;
|
||||||
|
|
||||||
/* Allow empty port, as per rfc2396. */
|
/* Allow empty port, as per rfc2396. */
|
||||||
if (port_b != port_e)
|
if (port_b != port_e)
|
||||||
{
|
|
||||||
for (port = 0, pp = port_b; pp < port_e; pp++)
|
for (port = 0, pp = port_b; pp < port_e; pp++)
|
||||||
{
|
{
|
||||||
if (!ISDIGIT (*pp))
|
if (!ISDIGIT (*pp))
|
||||||
@ -758,58 +785,31 @@ url_parse (const char *url, int *error)
|
|||||||
port = 10 * port + (*pp - '0');
|
port = 10 * port + (*pp - '0');
|
||||||
/* Check for too large port numbers here, before we have
|
/* Check for too large port numbers here, before we have
|
||||||
a chance to overflow on bogus port values. */
|
a chance to overflow on bogus port values. */
|
||||||
if (port > 65535)
|
if (port > 0xffff)
|
||||||
{
|
{
|
||||||
error_code = PE_BAD_PORT_NUMBER;
|
error_code = PE_BAD_PORT_NUMBER;
|
||||||
goto error;
|
goto error;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
/* Advance to the first separator *after* '/' (either ';' or '?',
|
||||||
|
depending on the scheme). */
|
||||||
|
++seps;
|
||||||
|
|
||||||
if (*p == '/')
|
/* Get the optional parts of URL, each part being delimited by
|
||||||
{
|
current location and the position of the next separator. */
|
||||||
++p;
|
#define GET_URL_PART(sepchar, var) do { \
|
||||||
path_b = p;
|
if (*p == sepchar) \
|
||||||
p = strpbrk_or_eos (p, ";?#");
|
var##_b = ++p, var##_e = p = strpbrk_or_eos (p, seps); \
|
||||||
path_e = p;
|
++seps; \
|
||||||
}
|
} while (0)
|
||||||
else
|
|
||||||
{
|
|
||||||
/* Path is not allowed not to exist. */
|
|
||||||
path_b = path_e = p;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (*p == ';')
|
GET_URL_PART ('/', path);
|
||||||
{
|
GET_URL_PART (';', params);
|
||||||
++p;
|
GET_URL_PART ('?', query);
|
||||||
params_b = p;
|
GET_URL_PART ('#', fragment);
|
||||||
p = strpbrk_or_eos (p, "?#");
|
|
||||||
params_e = p;
|
|
||||||
}
|
|
||||||
if (*p == '?')
|
|
||||||
{
|
|
||||||
++p;
|
|
||||||
query_b = p;
|
|
||||||
p = strpbrk_or_eos (p, "#");
|
|
||||||
query_e = p;
|
|
||||||
|
|
||||||
/* Hack that allows users to use '?' (a wildcard character) in
|
#undef GET_URL_PART
|
||||||
FTP URLs without it being interpreted as a query string
|
|
||||||
delimiter. */
|
|
||||||
if (scheme == SCHEME_FTP)
|
|
||||||
{
|
|
||||||
query_b = query_e = NULL;
|
|
||||||
path_e = p;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if (*p == '#')
|
|
||||||
{
|
|
||||||
++p;
|
|
||||||
fragment_b = p;
|
|
||||||
p += strlen (p);
|
|
||||||
fragment_e = p;
|
|
||||||
}
|
|
||||||
assert (*p == 0);
|
assert (*p == 0);
|
||||||
|
|
||||||
if (uname_b != uname_e)
|
if (uname_b != uname_e)
|
||||||
|
Loading…
Reference in New Issue
Block a user