mirror of
https://github.com/mirror/wget.git
synced 2025-01-19 00:30:22 +08:00
IRI requirement: do not percent-encode already percent-encoded values (try1)
This commit is contained in:
parent
523c3dfcbc
commit
66dd4bda74
@ -288,7 +288,7 @@ append_url (const char *link_uri, int position, int size,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
url = url_parse (link_uri, NULL, NULL);
|
url = url_parse (link_uri, NULL, NULL, false);
|
||||||
if (!url)
|
if (!url)
|
||||||
{
|
{
|
||||||
DEBUGP (("%s: link \"%s\" doesn't parse.\n",
|
DEBUGP (("%s: link \"%s\" doesn't parse.\n",
|
||||||
@ -307,7 +307,7 @@ append_url (const char *link_uri, int position, int size,
|
|||||||
DEBUGP (("%s: merge(\"%s\", \"%s\") -> %s\n",
|
DEBUGP (("%s: merge(\"%s\", \"%s\") -> %s\n",
|
||||||
ctx->document_file, base, link_uri, complete_uri));
|
ctx->document_file, base, link_uri, complete_uri));
|
||||||
|
|
||||||
url = url_parse (complete_uri, NULL, NULL);
|
url = url_parse (complete_uri, NULL, NULL, false);
|
||||||
if (!url)
|
if (!url)
|
||||||
{
|
{
|
||||||
DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
|
DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
|
||||||
@ -752,7 +752,7 @@ get_urls_file (const char *file)
|
|||||||
url_text = merged;
|
url_text = merged;
|
||||||
}
|
}
|
||||||
|
|
||||||
url = url_parse (url_text, &up_error_code, NULL);
|
url = url_parse (url_text, &up_error_code, NULL, false);
|
||||||
if (!url)
|
if (!url)
|
||||||
{
|
{
|
||||||
char *error = url_error (url_text, up_error_code);
|
char *error = url_error (url_text, up_error_code);
|
||||||
|
@ -298,6 +298,7 @@ iri_new (void)
|
|||||||
struct iri *i = xmalloc (sizeof (struct iri));
|
struct iri *i = xmalloc (sizeof (struct iri));
|
||||||
i->uri_encoding = opt.encoding_remote ? xstrdup (opt.encoding_remote) : NULL;
|
i->uri_encoding = opt.encoding_remote ? xstrdup (opt.encoding_remote) : NULL;
|
||||||
i->content_encoding = NULL;
|
i->content_encoding = NULL;
|
||||||
|
i->orig_url = NULL;
|
||||||
i->utf8_encode = opt.enable_iri;
|
i->utf8_encode = opt.enable_iri;
|
||||||
return i;
|
return i;
|
||||||
}
|
}
|
||||||
@ -308,6 +309,7 @@ iri_free (struct iri *i)
|
|||||||
{
|
{
|
||||||
xfree_null (i->uri_encoding);
|
xfree_null (i->uri_encoding);
|
||||||
xfree_null (i->content_encoding);
|
xfree_null (i->content_encoding);
|
||||||
|
xfree_null (i->orig_url);
|
||||||
xfree (i);
|
xfree (i);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -33,6 +33,7 @@ as that of the covered work. */
|
|||||||
struct iri {
|
struct iri {
|
||||||
char *uri_encoding; /* Encoding of the uri to fetch */
|
char *uri_encoding; /* Encoding of the uri to fetch */
|
||||||
char *content_encoding; /* Encoding of links inside the fetched file */
|
char *content_encoding; /* Encoding of links inside the fetched file */
|
||||||
|
char *orig_url; /* */
|
||||||
bool utf8_encode; /* Will/Is the current url encoded in utf8 */
|
bool utf8_encode; /* Will/Is the current url encoded in utf8 */
|
||||||
};
|
};
|
||||||
|
|
||||||
|
@ -214,7 +214,7 @@ retrieve_tree (const char *start_url, struct iri *pi)
|
|||||||
set_uri_encoding (i, opt.locale, true);
|
set_uri_encoding (i, opt.locale, true);
|
||||||
#undef COPYSTR
|
#undef COPYSTR
|
||||||
|
|
||||||
start_url_parsed = url_parse (start_url, &up_error_code, i);
|
start_url_parsed = url_parse (start_url, &up_error_code, i, true);
|
||||||
if (!start_url_parsed)
|
if (!start_url_parsed)
|
||||||
{
|
{
|
||||||
char *error = url_error (start_url, up_error_code);
|
char *error = url_error (start_url, up_error_code);
|
||||||
@ -381,7 +381,7 @@ retrieve_tree (const char *start_url, struct iri *pi)
|
|||||||
if (children)
|
if (children)
|
||||||
{
|
{
|
||||||
struct urlpos *child = children;
|
struct urlpos *child = children;
|
||||||
struct url *url_parsed = url_parse (url, NULL, i);
|
struct url *url_parsed = url_parse (url, NULL, i, false);
|
||||||
struct iri *ci;
|
struct iri *ci;
|
||||||
char *referer_url = url;
|
char *referer_url = url;
|
||||||
bool strip_auth = (url_parsed != NULL
|
bool strip_auth = (url_parsed != NULL
|
||||||
@ -694,10 +694,10 @@ descend_redirect_p (const char *redirected, const char *original, int depth,
|
|||||||
struct urlpos *upos;
|
struct urlpos *upos;
|
||||||
bool success;
|
bool success;
|
||||||
|
|
||||||
orig_parsed = url_parse (original, NULL, NULL);
|
orig_parsed = url_parse (original, NULL, NULL, false);
|
||||||
assert (orig_parsed != NULL);
|
assert (orig_parsed != NULL);
|
||||||
|
|
||||||
new_parsed = url_parse (redirected, NULL, NULL);
|
new_parsed = url_parse (redirected, NULL, NULL, false);
|
||||||
assert (new_parsed != NULL);
|
assert (new_parsed != NULL);
|
||||||
|
|
||||||
upos = xnew0 (struct urlpos);
|
upos = xnew0 (struct urlpos);
|
||||||
|
13
src/retr.c
13
src/retr.c
@ -626,7 +626,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
*file = NULL;
|
*file = NULL;
|
||||||
|
|
||||||
second_try:
|
second_try:
|
||||||
u = url_parse (url, &up_error_code, iri);
|
u = url_parse (url, &up_error_code, iri, true);
|
||||||
if (!u)
|
if (!u)
|
||||||
{
|
{
|
||||||
char *error = url_error (url, up_error_code);
|
char *error = url_error (url, up_error_code);
|
||||||
@ -658,7 +658,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
pi->utf8_encode = false;
|
pi->utf8_encode = false;
|
||||||
|
|
||||||
/* Parse the proxy URL. */
|
/* Parse the proxy URL. */
|
||||||
proxy_url = url_parse (proxy, &up_error_code, NULL);
|
proxy_url = url_parse (proxy, &up_error_code, NULL, true);
|
||||||
if (!proxy_url)
|
if (!proxy_url)
|
||||||
{
|
{
|
||||||
char *error = url_error (proxy, up_error_code);
|
char *error = url_error (proxy, up_error_code);
|
||||||
@ -739,9 +739,10 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
the content encoding. */
|
the content encoding. */
|
||||||
iri->utf8_encode = opt.enable_iri;
|
iri->utf8_encode = opt.enable_iri;
|
||||||
set_content_encoding (iri, NULL);
|
set_content_encoding (iri, NULL);
|
||||||
|
xfree_null (iri->orig_url);
|
||||||
|
|
||||||
/* Now, see if this new location makes sense. */
|
/* Now, see if this new location makes sense. */
|
||||||
newloc_parsed = url_parse (mynewloc, &up_error_code, iri);
|
newloc_parsed = url_parse (mynewloc, &up_error_code, iri, true);
|
||||||
if (!newloc_parsed)
|
if (!newloc_parsed)
|
||||||
{
|
{
|
||||||
char *error = url_error (mynewloc, up_error_code);
|
char *error = url_error (mynewloc, up_error_code);
|
||||||
@ -794,7 +795,7 @@ retrieve_url (const char *origurl, char **file, char **newloc,
|
|||||||
if (!(*dt & RETROKF) && iri->utf8_encode)
|
if (!(*dt & RETROKF) && iri->utf8_encode)
|
||||||
{
|
{
|
||||||
iri->utf8_encode = false;
|
iri->utf8_encode = false;
|
||||||
DEBUGP (("[IRI Fallbacking to non-utf8 for %s\n", quote (url)));
|
DEBUGP (("[IRI fallbacking to non-utf8 for %s\n", quote (url)));
|
||||||
goto second_try;
|
goto second_try;
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -907,6 +908,8 @@ retrieve_from_file (const char *file, bool html, int *count)
|
|||||||
|
|
||||||
/* Reset UTF-8 encode status */
|
/* Reset UTF-8 encode status */
|
||||||
iri->utf8_encode = opt.enable_iri;
|
iri->utf8_encode = opt.enable_iri;
|
||||||
|
xfree_null (iri->orig_url);
|
||||||
|
iri->orig_url = NULL;
|
||||||
|
|
||||||
if ((opt.recursive || opt.page_requisites)
|
if ((opt.recursive || opt.page_requisites)
|
||||||
&& (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
|
&& (cur_url->url->scheme != SCHEME_FTP || getproxy (cur_url->url)))
|
||||||
@ -1100,7 +1103,7 @@ url_uses_proxy (const char *url)
|
|||||||
struct iri *i = iri_new();
|
struct iri *i = iri_new();
|
||||||
/* url was given in the command line, so use locale as encoding */
|
/* url was given in the command line, so use locale as encoding */
|
||||||
set_uri_encoding (i, opt.locale, true);
|
set_uri_encoding (i, opt.locale, true);
|
||||||
u= url_parse (url, NULL, i);
|
u= url_parse (url, NULL, i, false);
|
||||||
if (!u)
|
if (!u)
|
||||||
return false;
|
return false;
|
||||||
ret = getproxy (u) != NULL;
|
ret = getproxy (u) != NULL;
|
||||||
|
22
src/url.c
22
src/url.c
@ -640,7 +640,7 @@ static const char *parse_errors[] = {
|
|||||||
error, and if ERROR is not NULL, also set *ERROR to the appropriate
|
error, and if ERROR is not NULL, also set *ERROR to the appropriate
|
||||||
error code. */
|
error code. */
|
||||||
struct url *
|
struct url *
|
||||||
url_parse (const char *url, int *error, struct iri *iri)
|
url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
|
||||||
{
|
{
|
||||||
struct url *u;
|
struct url *u;
|
||||||
const char *p;
|
const char *p;
|
||||||
@ -672,13 +672,19 @@ url_parse (const char *url, int *error, struct iri *iri)
|
|||||||
|
|
||||||
if (iri && iri->utf8_encode)
|
if (iri && iri->utf8_encode)
|
||||||
{
|
{
|
||||||
url_unescape ((char *) url);
|
iri->utf8_encode = remote_to_utf8 (iri, iri->orig_url ? iri->orig_url : url, (const char **) &new_url);
|
||||||
iri->utf8_encode = remote_to_utf8 (iri, url, (const char **) &new_url);
|
|
||||||
if (!iri->utf8_encode)
|
if (!iri->utf8_encode)
|
||||||
new_url = NULL;
|
new_url = NULL;
|
||||||
|
else
|
||||||
|
iri->orig_url = xstrdup (url);
|
||||||
}
|
}
|
||||||
|
|
||||||
url_encoded = reencode_escapes (new_url ? new_url : url);
|
/* XXX XXX Could that change introduce (security) bugs ??? XXX XXX*/
|
||||||
|
if (percent_encode)
|
||||||
|
url_encoded = reencode_escapes (new_url ? new_url : url);
|
||||||
|
else
|
||||||
|
url_encoded = new_url ? new_url : url;
|
||||||
|
|
||||||
p = url_encoded;
|
p = url_encoded;
|
||||||
|
|
||||||
if (new_url && url_encoded != new_url)
|
if (new_url && url_encoded != new_url)
|
||||||
@ -1992,12 +1998,12 @@ schemes_are_similar_p (enum url_scheme a, enum url_scheme b)
|
|||||||
|
|
||||||
static int
|
static int
|
||||||
getchar_from_escaped_string (const char *str, char *c)
|
getchar_from_escaped_string (const char *str, char *c)
|
||||||
{
|
{
|
||||||
const char *p = str;
|
const char *p = str;
|
||||||
|
|
||||||
assert (str && *str);
|
assert (str && *str);
|
||||||
assert (c);
|
assert (c);
|
||||||
|
|
||||||
if (p[0] == '%')
|
if (p[0] == '%')
|
||||||
{
|
{
|
||||||
if (!c_isxdigit(p[1]) || !c_isxdigit(p[2]))
|
if (!c_isxdigit(p[1]) || !c_isxdigit(p[2]))
|
||||||
@ -2047,7 +2053,7 @@ are_urls_equal (const char *u1, const char *u2)
|
|||||||
p += pp;
|
p += pp;
|
||||||
q += qq;
|
q += qq;
|
||||||
}
|
}
|
||||||
|
|
||||||
return (*p == 0 && *q == 0 ? true : false);
|
return (*p == 0 && *q == 0 ? true : false);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -2156,7 +2162,7 @@ test_append_uri_pathel()
|
|||||||
} test_array[] = {
|
} test_array[] = {
|
||||||
{ "http://www.yoyodyne.com/path/", "somepage.html", false, "http://www.yoyodyne.com/path/somepage.html" },
|
{ "http://www.yoyodyne.com/path/", "somepage.html", false, "http://www.yoyodyne.com/path/somepage.html" },
|
||||||
};
|
};
|
||||||
|
|
||||||
for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
|
for (i = 0; i < sizeof(test_array)/sizeof(test_array[0]); ++i)
|
||||||
{
|
{
|
||||||
struct growable dest;
|
struct growable dest;
|
||||||
|
@ -84,7 +84,7 @@ struct url
|
|||||||
|
|
||||||
char *url_escape (const char *);
|
char *url_escape (const char *);
|
||||||
|
|
||||||
struct url *url_parse (const char *, int *, struct iri *iri);
|
struct url *url_parse (const char *, int *, struct iri *iri, bool percent_encode);
|
||||||
char *url_error (const char *, int);
|
char *url_error (const char *, int);
|
||||||
char *url_full_path (const struct url *);
|
char *url_full_path (const struct url *);
|
||||||
void url_set_dir (struct url *, const char *);
|
void url_set_dir (struct url *, const char *);
|
||||||
|
@ -214,9 +214,9 @@ my %expected_downloaded_files = (
|
|||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
my $the_test = HTTPTest->new (name => "Test-iri",
|
my $the_test = HTTPTest->new (name => "Test-iri",
|
||||||
input => \%urls,
|
input => \%urls,
|
||||||
cmdline => $cmdline,
|
cmdline => $cmdline,
|
||||||
errcode => $expected_error_code,
|
errcode => $expected_error_code,
|
||||||
output => \%expected_downloaded_files);
|
output => \%expected_downloaded_files);
|
||||||
exit $the_test->run();
|
exit $the_test->run();
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user