mirror of
https://github.com/mirror/wget.git
synced 2025-03-14 11:50:18 +08:00
Merge remote-tracking branch 'origin' into parallel-wget
This commit is contained in:
commit
93ad38686a
@ -1,3 +1,7 @@
|
||||
2013-10-06 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* wget.texi: add/explain quoting of wildcard patterns
|
||||
|
||||
2013-09-04 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* sample.wgetrc: added "secureprotocol" example
|
||||
|
@ -2100,6 +2100,8 @@ accept or reject (@pxref{Types of Files}). Note that if
|
||||
any of the wildcard characters, @samp{*}, @samp{?}, @samp{[} or
|
||||
@samp{]}, appear in an element of @var{acclist} or @var{rejlist},
|
||||
it will be treated as a pattern, rather than a suffix.
|
||||
In this case, you have to enclose the pattern into quotes to prevent
|
||||
your shell from expanding it, like in @samp{-A "*.mp3"} or @samp{-A '*.mp3'}.
|
||||
|
||||
@item --accept-regex @var{urlregex}
|
||||
@itemx --reject-regex @var{urlregex}
|
||||
@ -2157,8 +2159,10 @@ dedicated @samp{--page-requisites} option.
|
||||
Ignore case when matching files and directories. This influences the
|
||||
behavior of -R, -A, -I, and -X options, as well as globbing
|
||||
implemented when downloading from FTP sites. For example, with this
|
||||
option, @samp{-A *.txt} will match @samp{file1.txt}, but also
|
||||
option, @samp{-A "*.txt"} will match @samp{file1.txt}, but also
|
||||
@samp{file2.TXT}, @samp{file3.TxT}, and so on.
|
||||
The quotes in the example are to prevent the shell from expanding the
|
||||
pattern.
|
||||
|
||||
@item -H
|
||||
@itemx --span-hosts
|
||||
|
@ -15,6 +15,11 @@
|
||||
|
||||
* utils.c (run_with_timeout): abort when there are more threads.
|
||||
|
||||
2013-10-10 Giuseppe Scrivano <gscrivan@redhat.com>
|
||||
|
||||
* url.c (url_parse): Try to convert UTF-8 URLs to IDN.
|
||||
* html-url.c (append_url): Parse URLs specifying an IRI structure.
|
||||
|
||||
2013-09-13 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* recur.c (download_child_p): fix compile error when
|
||||
|
@ -284,6 +284,10 @@ append_url (const char *link_uri, int position, int size,
|
||||
const char *base = ctx->base ? ctx->base : ctx->parent_base;
|
||||
struct url *url;
|
||||
|
||||
struct iri *iri = iri_new ();
|
||||
set_uri_encoding (iri, opt.locale, true);
|
||||
iri->utf8_encode = true;
|
||||
|
||||
if (!base)
|
||||
{
|
||||
DEBUGP (("%s: no base, merge will use \"%s\".\n",
|
||||
@ -301,7 +305,7 @@ append_url (const char *link_uri, int position, int size,
|
||||
return NULL;
|
||||
}
|
||||
|
||||
url = url_parse (link_uri, NULL, NULL, false);
|
||||
url = url_parse (link_uri, NULL, iri, false);
|
||||
if (!url)
|
||||
{
|
||||
DEBUGP (("%s: link \"%s\" doesn't parse.\n",
|
||||
@ -323,7 +327,7 @@ append_url (const char *link_uri, int position, int size,
|
||||
quote_n (2, link_uri),
|
||||
quotearg_n_style (3, escape_quoting_style, complete_uri)));
|
||||
|
||||
url = url_parse (complete_uri, NULL, NULL, false);
|
||||
url = url_parse (complete_uri, NULL, iri, false);
|
||||
if (!url)
|
||||
{
|
||||
DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
|
||||
@ -334,6 +338,8 @@ append_url (const char *link_uri, int position, int size,
|
||||
xfree (complete_uri);
|
||||
}
|
||||
|
||||
iri_free (iri);
|
||||
|
||||
DEBUGP (("appending %s to urlpos.\n", quote (url->url)));
|
||||
|
||||
newel = xnew0 (struct urlpos);
|
||||
|
@ -702,7 +702,10 @@ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
|
||||
if (!iri->utf8_encode)
|
||||
new_url = NULL;
|
||||
else
|
||||
iri->orig_url = xstrdup (url);
|
||||
{
|
||||
iri->orig_url = xstrdup (url);
|
||||
percent_encode = true;
|
||||
}
|
||||
}
|
||||
|
||||
/* XXX XXX Could that change introduce (security) bugs ??? XXX XXX*/
|
||||
|
@ -1,3 +1,15 @@
|
||||
2013-10-10 Giuseppe Scrivano <gscrivan@redhat.com>
|
||||
|
||||
* Test-idn-robots-utf8.px: Remove -H.
|
||||
* Test-idn-cmd.px: Likewise.
|
||||
* Test-idn-cmd-utf8.px: Likewise.
|
||||
Suggested by: Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
2013-10-07 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* Test-idn-robots.px: added punycoded and escaped URLs to follow
|
||||
removed -H
|
||||
|
||||
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||
|
||||
* Makefile.am (EXTRA_DIST): Add Test--httpsonly-r.px.
|
||||
|
@ -28,7 +28,7 @@ my %urls = (
|
||||
},
|
||||
);
|
||||
|
||||
my $cmdline = $WgetTest::WGETPATH . " --iri -rH"
|
||||
my $cmdline = $WgetTest::WGETPATH . " --iri -r"
|
||||
. " -e http_proxy=localhost:{{port}} --local-encoding=UTF-8 $utf8_hostname";
|
||||
|
||||
my $expected_error_code = 0;
|
||||
|
@ -28,7 +28,7 @@ my %urls = (
|
||||
},
|
||||
);
|
||||
|
||||
my $cmdline = $WgetTest::WGETPATH . " --iri -rH"
|
||||
my $cmdline = $WgetTest::WGETPATH . " --iri -r"
|
||||
. " -e http_proxy=localhost:{{port}} --local-encoding=EUC-JP $euc_jp_hostname";
|
||||
|
||||
my $expected_error_code = 0;
|
||||
|
@ -48,7 +48,7 @@ my %urls = (
|
||||
},
|
||||
);
|
||||
|
||||
my $cmdline = $WgetTest::WGETPATH . " --iri -rH"
|
||||
my $cmdline = $WgetTest::WGETPATH . " --iri -r"
|
||||
. " -e http_proxy=localhost:{{port}} --local-encoding=UTF-8"
|
||||
. " http://$utf8_hostname/";
|
||||
|
||||
|
@ -9,11 +9,14 @@ use HTTPTest;
|
||||
# " Kon'nichiwa <dot> Japan
|
||||
my $euc_jp_hostname = "\272\243\306\374\244\317.\306\374\313\334";
|
||||
my $punycoded_hostname = 'xn--v9ju72g90p.xn--wgv71a';
|
||||
my $escaped_hostname = "%ba%a3%c6%fc%a4%cf.%c6%fc%cb%dc";
|
||||
|
||||
###############################################################################
|
||||
|
||||
my $starter_file = <<EOF;
|
||||
<a href="http://$euc_jp_hostname/foo.txt">The link</a>
|
||||
<a href="http://$punycoded_hostname/foo2.txt">The second link</a>
|
||||
<a href="http://$escaped_hostname/foo3.txt">The third link</a>
|
||||
EOF
|
||||
|
||||
my $result_file = <<EOF;
|
||||
@ -38,6 +41,22 @@ my %urls = (
|
||||
},
|
||||
content => $result_file,
|
||||
},
|
||||
"http://$punycoded_hostname/foo2.txt" => {
|
||||
code => "200",
|
||||
msg => "Uh-huh2",
|
||||
headers => {
|
||||
'Content-Type' => 'text/plain',
|
||||
},
|
||||
content => $result_file,
|
||||
},
|
||||
"http://$punycoded_hostname/foo3.txt" => {
|
||||
code => "200",
|
||||
msg => "Uh-huh3",
|
||||
headers => {
|
||||
'Content-Type' => 'text/plain',
|
||||
},
|
||||
content => $result_file,
|
||||
},
|
||||
"http://$punycoded_hostname/robots.txt" => {
|
||||
code => "200",
|
||||
msg => "Uh-huh",
|
||||
@ -48,7 +67,7 @@ my %urls = (
|
||||
},
|
||||
);
|
||||
|
||||
my $cmdline = $WgetTest::WGETPATH . " --iri -rH"
|
||||
my $cmdline = $WgetTest::WGETPATH . " --iri -r"
|
||||
. " -e http_proxy=localhost:{{port}} --local-encoding=EUC-JP"
|
||||
. " http://$euc_jp_hostname/";
|
||||
|
||||
@ -61,6 +80,12 @@ my %expected_downloaded_files = (
|
||||
"$punycoded_hostname/foo.txt" => {
|
||||
content => $result_file,
|
||||
},
|
||||
"$punycoded_hostname/foo2.txt" => {
|
||||
content => $result_file,
|
||||
},
|
||||
"$punycoded_hostname/foo3.txt" => {
|
||||
content => $result_file,
|
||||
},
|
||||
"$punycoded_hostname/robots.txt" => {
|
||||
content => '',
|
||||
},
|
||||
|
Loading…
Reference in New Issue
Block a user