Merge remote-tracking branch 'origin' into parallel-wget

This commit is contained in:
Giuseppe Scrivano 2013-10-13 17:53:35 +02:00
commit 93ad38686a
10 changed files with 67 additions and 8 deletions

View File

@ -1,3 +1,7 @@
2013-10-06 Tim Ruehsen <tim.ruehsen@gmx.de>
* wget.texi: add/explain quoting of wildcard patterns
2013-09-04 Tim Ruehsen <tim.ruehsen@gmx.de>
* sample.wgetrc: added "secureprotocol" example

View File

@ -2100,6 +2100,8 @@ accept or reject (@pxref{Types of Files}). Note that if
any of the wildcard characters, @samp{*}, @samp{?}, @samp{[} or
@samp{]}, appear in an element of @var{acclist} or @var{rejlist},
it will be treated as a pattern, rather than a suffix.
In this case, you have to enclose the pattern into quotes to prevent
your shell from expanding it, like in @samp{-A "*.mp3"} or @samp{-A '*.mp3'}.
@item --accept-regex @var{urlregex}
@itemx --reject-regex @var{urlregex}
@ -2157,8 +2159,10 @@ dedicated @samp{--page-requisites} option.
Ignore case when matching files and directories. This influences the
behavior of -R, -A, -I, and -X options, as well as globbing
implemented when downloading from FTP sites. For example, with this
option, @samp{-A *.txt} will match @samp{file1.txt}, but also
option, @samp{-A "*.txt"} will match @samp{file1.txt}, but also
@samp{file2.TXT}, @samp{file3.TxT}, and so on.
The quotes in the example are to prevent the shell from expanding the
pattern.
@item -H
@itemx --span-hosts

View File

@ -15,6 +15,11 @@
* utils.c (run_with_timeout): abort when there are more threads.
2013-10-10 Giuseppe Scrivano <gscrivan@redhat.com>
* url.c (url_parse): Try to convert UTF-8 URLs to IDN.
* html-url.c (append_url): Parse URLs specifying an IRI structure.
2013-09-13 Tim Ruehsen <tim.ruehsen@gmx.de>
* recur.c (download_child_p): fix compile error when

View File

@ -284,6 +284,10 @@ append_url (const char *link_uri, int position, int size,
const char *base = ctx->base ? ctx->base : ctx->parent_base;
struct url *url;
struct iri *iri = iri_new ();
set_uri_encoding (iri, opt.locale, true);
iri->utf8_encode = true;
if (!base)
{
DEBUGP (("%s: no base, merge will use \"%s\".\n",
@ -301,7 +305,7 @@ append_url (const char *link_uri, int position, int size,
return NULL;
}
url = url_parse (link_uri, NULL, NULL, false);
url = url_parse (link_uri, NULL, iri, false);
if (!url)
{
DEBUGP (("%s: link \"%s\" doesn't parse.\n",
@ -323,7 +327,7 @@ append_url (const char *link_uri, int position, int size,
quote_n (2, link_uri),
quotearg_n_style (3, escape_quoting_style, complete_uri)));
url = url_parse (complete_uri, NULL, NULL, false);
url = url_parse (complete_uri, NULL, iri, false);
if (!url)
{
DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
@ -334,6 +338,8 @@ append_url (const char *link_uri, int position, int size,
xfree (complete_uri);
}
iri_free (iri);
DEBUGP (("appending %s to urlpos.\n", quote (url->url)));
newel = xnew0 (struct urlpos);

View File

@ -702,7 +702,10 @@ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
if (!iri->utf8_encode)
new_url = NULL;
else
iri->orig_url = xstrdup (url);
{
iri->orig_url = xstrdup (url);
percent_encode = true;
}
}
/* XXX XXX Could that change introduce (security) bugs ??? XXX XXX*/

View File

@ -1,3 +1,15 @@
2013-10-10 Giuseppe Scrivano <gscrivan@redhat.com>
* Test-idn-robots-utf8.px: Remove -H.
* Test-idn-cmd.px: Likewise.
* Test-idn-cmd-utf8.px: Likewise.
Suggested by: Tim Ruehsen <tim.ruehsen@gmx.de>
2013-10-07 Tim Ruehsen <tim.ruehsen@gmx.de>
* Test-idn-robots.px: added punycoded and escaped URLs to follow
removed -H
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
* Makefile.am (EXTRA_DIST): Add Test--httpsonly-r.px.

View File

@ -28,7 +28,7 @@ my %urls = (
},
);
my $cmdline = $WgetTest::WGETPATH . " --iri -rH"
my $cmdline = $WgetTest::WGETPATH . " --iri -r"
. " -e http_proxy=localhost:{{port}} --local-encoding=UTF-8 $utf8_hostname";
my $expected_error_code = 0;

View File

@ -28,7 +28,7 @@ my %urls = (
},
);
my $cmdline = $WgetTest::WGETPATH . " --iri -rH"
my $cmdline = $WgetTest::WGETPATH . " --iri -r"
. " -e http_proxy=localhost:{{port}} --local-encoding=EUC-JP $euc_jp_hostname";
my $expected_error_code = 0;

View File

@ -48,7 +48,7 @@ my %urls = (
},
);
my $cmdline = $WgetTest::WGETPATH . " --iri -rH"
my $cmdline = $WgetTest::WGETPATH . " --iri -r"
. " -e http_proxy=localhost:{{port}} --local-encoding=UTF-8"
. " http://$utf8_hostname/";

View File

@ -9,11 +9,14 @@ use HTTPTest;
# " Kon'nichiwa <dot> Japan
my $euc_jp_hostname = "\272\243\306\374\244\317.\306\374\313\334";
my $punycoded_hostname = 'xn--v9ju72g90p.xn--wgv71a';
my $escaped_hostname = "%ba%a3%c6%fc%a4%cf.%c6%fc%cb%dc";
###############################################################################
my $starter_file = <<EOF;
<a href="http://$euc_jp_hostname/foo.txt">The link</a>
<a href="http://$punycoded_hostname/foo2.txt">The second link</a>
<a href="http://$escaped_hostname/foo3.txt">The third link</a>
EOF
my $result_file = <<EOF;
@ -38,6 +41,22 @@ my %urls = (
},
content => $result_file,
},
"http://$punycoded_hostname/foo2.txt" => {
code => "200",
msg => "Uh-huh2",
headers => {
'Content-Type' => 'text/plain',
},
content => $result_file,
},
"http://$punycoded_hostname/foo3.txt" => {
code => "200",
msg => "Uh-huh3",
headers => {
'Content-Type' => 'text/plain',
},
content => $result_file,
},
"http://$punycoded_hostname/robots.txt" => {
code => "200",
msg => "Uh-huh",
@ -48,7 +67,7 @@ my %urls = (
},
);
my $cmdline = $WgetTest::WGETPATH . " --iri -rH"
my $cmdline = $WgetTest::WGETPATH . " --iri -r"
. " -e http_proxy=localhost:{{port}} --local-encoding=EUC-JP"
. " http://$euc_jp_hostname/";
@ -61,6 +80,12 @@ my %expected_downloaded_files = (
"$punycoded_hostname/foo.txt" => {
content => $result_file,
},
"$punycoded_hostname/foo2.txt" => {
content => $result_file,
},
"$punycoded_hostname/foo3.txt" => {
content => $result_file,
},
"$punycoded_hostname/robots.txt" => {
content => '',
},