mirror of
https://github.com/mirror/wget.git
synced 2025-03-22 15:50:13 +08:00
Merge remote-tracking branch 'origin' into parallel-wget
This commit is contained in:
commit
93ad38686a
@ -1,3 +1,7 @@
|
|||||||
|
2013-10-06 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
|
|
||||||
|
* wget.texi: add/explain quoting of wildcard patterns
|
||||||
|
|
||||||
2013-09-04 Tim Ruehsen <tim.ruehsen@gmx.de>
|
2013-09-04 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
|
|
||||||
* sample.wgetrc: added "secureprotocol" example
|
* sample.wgetrc: added "secureprotocol" example
|
||||||
|
@ -2100,6 +2100,8 @@ accept or reject (@pxref{Types of Files}). Note that if
|
|||||||
any of the wildcard characters, @samp{*}, @samp{?}, @samp{[} or
|
any of the wildcard characters, @samp{*}, @samp{?}, @samp{[} or
|
||||||
@samp{]}, appear in an element of @var{acclist} or @var{rejlist},
|
@samp{]}, appear in an element of @var{acclist} or @var{rejlist},
|
||||||
it will be treated as a pattern, rather than a suffix.
|
it will be treated as a pattern, rather than a suffix.
|
||||||
|
In this case, you have to enclose the pattern into quotes to prevent
|
||||||
|
your shell from expanding it, like in @samp{-A "*.mp3"} or @samp{-A '*.mp3'}.
|
||||||
|
|
||||||
@item --accept-regex @var{urlregex}
|
@item --accept-regex @var{urlregex}
|
||||||
@itemx --reject-regex @var{urlregex}
|
@itemx --reject-regex @var{urlregex}
|
||||||
@ -2157,8 +2159,10 @@ dedicated @samp{--page-requisites} option.
|
|||||||
Ignore case when matching files and directories. This influences the
|
Ignore case when matching files and directories. This influences the
|
||||||
behavior of -R, -A, -I, and -X options, as well as globbing
|
behavior of -R, -A, -I, and -X options, as well as globbing
|
||||||
implemented when downloading from FTP sites. For example, with this
|
implemented when downloading from FTP sites. For example, with this
|
||||||
option, @samp{-A *.txt} will match @samp{file1.txt}, but also
|
option, @samp{-A "*.txt"} will match @samp{file1.txt}, but also
|
||||||
@samp{file2.TXT}, @samp{file3.TxT}, and so on.
|
@samp{file2.TXT}, @samp{file3.TxT}, and so on.
|
||||||
|
The quotes in the example are to prevent the shell from expanding the
|
||||||
|
pattern.
|
||||||
|
|
||||||
@item -H
|
@item -H
|
||||||
@itemx --span-hosts
|
@itemx --span-hosts
|
||||||
|
@ -15,6 +15,11 @@
|
|||||||
|
|
||||||
* utils.c (run_with_timeout): abort when there are more threads.
|
* utils.c (run_with_timeout): abort when there are more threads.
|
||||||
|
|
||||||
|
2013-10-10 Giuseppe Scrivano <gscrivan@redhat.com>
|
||||||
|
|
||||||
|
* url.c (url_parse): Try to convert UTF-8 URLs to IDN.
|
||||||
|
* html-url.c (append_url): Parse URLs specifying an IRI structure.
|
||||||
|
|
||||||
2013-09-13 Tim Ruehsen <tim.ruehsen@gmx.de>
|
2013-09-13 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
|
|
||||||
* recur.c (download_child_p): fix compile error when
|
* recur.c (download_child_p): fix compile error when
|
||||||
|
@ -284,6 +284,10 @@ append_url (const char *link_uri, int position, int size,
|
|||||||
const char *base = ctx->base ? ctx->base : ctx->parent_base;
|
const char *base = ctx->base ? ctx->base : ctx->parent_base;
|
||||||
struct url *url;
|
struct url *url;
|
||||||
|
|
||||||
|
struct iri *iri = iri_new ();
|
||||||
|
set_uri_encoding (iri, opt.locale, true);
|
||||||
|
iri->utf8_encode = true;
|
||||||
|
|
||||||
if (!base)
|
if (!base)
|
||||||
{
|
{
|
||||||
DEBUGP (("%s: no base, merge will use \"%s\".\n",
|
DEBUGP (("%s: no base, merge will use \"%s\".\n",
|
||||||
@ -301,7 +305,7 @@ append_url (const char *link_uri, int position, int size,
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
url = url_parse (link_uri, NULL, NULL, false);
|
url = url_parse (link_uri, NULL, iri, false);
|
||||||
if (!url)
|
if (!url)
|
||||||
{
|
{
|
||||||
DEBUGP (("%s: link \"%s\" doesn't parse.\n",
|
DEBUGP (("%s: link \"%s\" doesn't parse.\n",
|
||||||
@ -323,7 +327,7 @@ append_url (const char *link_uri, int position, int size,
|
|||||||
quote_n (2, link_uri),
|
quote_n (2, link_uri),
|
||||||
quotearg_n_style (3, escape_quoting_style, complete_uri)));
|
quotearg_n_style (3, escape_quoting_style, complete_uri)));
|
||||||
|
|
||||||
url = url_parse (complete_uri, NULL, NULL, false);
|
url = url_parse (complete_uri, NULL, iri, false);
|
||||||
if (!url)
|
if (!url)
|
||||||
{
|
{
|
||||||
DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
|
DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
|
||||||
@ -334,6 +338,8 @@ append_url (const char *link_uri, int position, int size,
|
|||||||
xfree (complete_uri);
|
xfree (complete_uri);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
iri_free (iri);
|
||||||
|
|
||||||
DEBUGP (("appending %s to urlpos.\n", quote (url->url)));
|
DEBUGP (("appending %s to urlpos.\n", quote (url->url)));
|
||||||
|
|
||||||
newel = xnew0 (struct urlpos);
|
newel = xnew0 (struct urlpos);
|
||||||
|
@ -702,7 +702,10 @@ url_parse (const char *url, int *error, struct iri *iri, bool percent_encode)
|
|||||||
if (!iri->utf8_encode)
|
if (!iri->utf8_encode)
|
||||||
new_url = NULL;
|
new_url = NULL;
|
||||||
else
|
else
|
||||||
iri->orig_url = xstrdup (url);
|
{
|
||||||
|
iri->orig_url = xstrdup (url);
|
||||||
|
percent_encode = true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* XXX XXX Could that change introduce (security) bugs ??? XXX XXX*/
|
/* XXX XXX Could that change introduce (security) bugs ??? XXX XXX*/
|
||||||
|
@ -1,3 +1,15 @@
|
|||||||
|
2013-10-10 Giuseppe Scrivano <gscrivan@redhat.com>
|
||||||
|
|
||||||
|
* Test-idn-robots-utf8.px: Remove -H.
|
||||||
|
* Test-idn-cmd.px: Likewise.
|
||||||
|
* Test-idn-cmd-utf8.px: Likewise.
|
||||||
|
Suggested by: Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
|
|
||||||
|
2013-10-07 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
|
|
||||||
|
* Test-idn-robots.px: added punycoded and escaped URLs to follow
|
||||||
|
removed -H
|
||||||
|
|
||||||
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
|
2013-08-22 Tim Ruehsen <tim.ruehsen@gmx.de>
|
||||||
|
|
||||||
* Makefile.am (EXTRA_DIST): Add Test--httpsonly-r.px.
|
* Makefile.am (EXTRA_DIST): Add Test--httpsonly-r.px.
|
||||||
|
@ -28,7 +28,7 @@ my %urls = (
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
my $cmdline = $WgetTest::WGETPATH . " --iri -rH"
|
my $cmdline = $WgetTest::WGETPATH . " --iri -r"
|
||||||
. " -e http_proxy=localhost:{{port}} --local-encoding=UTF-8 $utf8_hostname";
|
. " -e http_proxy=localhost:{{port}} --local-encoding=UTF-8 $utf8_hostname";
|
||||||
|
|
||||||
my $expected_error_code = 0;
|
my $expected_error_code = 0;
|
||||||
|
@ -28,7 +28,7 @@ my %urls = (
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
my $cmdline = $WgetTest::WGETPATH . " --iri -rH"
|
my $cmdline = $WgetTest::WGETPATH . " --iri -r"
|
||||||
. " -e http_proxy=localhost:{{port}} --local-encoding=EUC-JP $euc_jp_hostname";
|
. " -e http_proxy=localhost:{{port}} --local-encoding=EUC-JP $euc_jp_hostname";
|
||||||
|
|
||||||
my $expected_error_code = 0;
|
my $expected_error_code = 0;
|
||||||
|
@ -48,7 +48,7 @@ my %urls = (
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
my $cmdline = $WgetTest::WGETPATH . " --iri -rH"
|
my $cmdline = $WgetTest::WGETPATH . " --iri -r"
|
||||||
. " -e http_proxy=localhost:{{port}} --local-encoding=UTF-8"
|
. " -e http_proxy=localhost:{{port}} --local-encoding=UTF-8"
|
||||||
. " http://$utf8_hostname/";
|
. " http://$utf8_hostname/";
|
||||||
|
|
||||||
|
@ -9,11 +9,14 @@ use HTTPTest;
|
|||||||
# " Kon'nichiwa <dot> Japan
|
# " Kon'nichiwa <dot> Japan
|
||||||
my $euc_jp_hostname = "\272\243\306\374\244\317.\306\374\313\334";
|
my $euc_jp_hostname = "\272\243\306\374\244\317.\306\374\313\334";
|
||||||
my $punycoded_hostname = 'xn--v9ju72g90p.xn--wgv71a';
|
my $punycoded_hostname = 'xn--v9ju72g90p.xn--wgv71a';
|
||||||
|
my $escaped_hostname = "%ba%a3%c6%fc%a4%cf.%c6%fc%cb%dc";
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
|
||||||
my $starter_file = <<EOF;
|
my $starter_file = <<EOF;
|
||||||
<a href="http://$euc_jp_hostname/foo.txt">The link</a>
|
<a href="http://$euc_jp_hostname/foo.txt">The link</a>
|
||||||
|
<a href="http://$punycoded_hostname/foo2.txt">The second link</a>
|
||||||
|
<a href="http://$escaped_hostname/foo3.txt">The third link</a>
|
||||||
EOF
|
EOF
|
||||||
|
|
||||||
my $result_file = <<EOF;
|
my $result_file = <<EOF;
|
||||||
@ -38,6 +41,22 @@ my %urls = (
|
|||||||
},
|
},
|
||||||
content => $result_file,
|
content => $result_file,
|
||||||
},
|
},
|
||||||
|
"http://$punycoded_hostname/foo2.txt" => {
|
||||||
|
code => "200",
|
||||||
|
msg => "Uh-huh2",
|
||||||
|
headers => {
|
||||||
|
'Content-Type' => 'text/plain',
|
||||||
|
},
|
||||||
|
content => $result_file,
|
||||||
|
},
|
||||||
|
"http://$punycoded_hostname/foo3.txt" => {
|
||||||
|
code => "200",
|
||||||
|
msg => "Uh-huh3",
|
||||||
|
headers => {
|
||||||
|
'Content-Type' => 'text/plain',
|
||||||
|
},
|
||||||
|
content => $result_file,
|
||||||
|
},
|
||||||
"http://$punycoded_hostname/robots.txt" => {
|
"http://$punycoded_hostname/robots.txt" => {
|
||||||
code => "200",
|
code => "200",
|
||||||
msg => "Uh-huh",
|
msg => "Uh-huh",
|
||||||
@ -48,7 +67,7 @@ my %urls = (
|
|||||||
},
|
},
|
||||||
);
|
);
|
||||||
|
|
||||||
my $cmdline = $WgetTest::WGETPATH . " --iri -rH"
|
my $cmdline = $WgetTest::WGETPATH . " --iri -r"
|
||||||
. " -e http_proxy=localhost:{{port}} --local-encoding=EUC-JP"
|
. " -e http_proxy=localhost:{{port}} --local-encoding=EUC-JP"
|
||||||
. " http://$euc_jp_hostname/";
|
. " http://$euc_jp_hostname/";
|
||||||
|
|
||||||
@ -61,6 +80,12 @@ my %expected_downloaded_files = (
|
|||||||
"$punycoded_hostname/foo.txt" => {
|
"$punycoded_hostname/foo.txt" => {
|
||||||
content => $result_file,
|
content => $result_file,
|
||||||
},
|
},
|
||||||
|
"$punycoded_hostname/foo2.txt" => {
|
||||||
|
content => $result_file,
|
||||||
|
},
|
||||||
|
"$punycoded_hostname/foo3.txt" => {
|
||||||
|
content => $result_file,
|
||||||
|
},
|
||||||
"$punycoded_hostname/robots.txt" => {
|
"$punycoded_hostname/robots.txt" => {
|
||||||
content => '',
|
content => '',
|
||||||
},
|
},
|
||||||
|
Loading…
Reference in New Issue
Block a user