mirror of
https://github.com/mirror/wget.git
synced 2024-12-28 05:40:08 +08:00
Bugfix: Keep the download progress when alternating metalink:url
* NEWS: Mention the effects of --continue over Metalink * src/metalink.c (retrieve_from_metalink): On download error, resume output_stream with the next mres->url. Keep fully downloaded files started with --continue, otherwise rename/remove the file * testenv/Makefile.am: Add new file * testenv/Test-metalink-xml-continue.py: New file. Metalink/XML continue/keep existing files (HTTP 416) with --continue tests Before this patch, with --continue, existing and/or fully retrieved files which fail the sanity tests were renamed (--keep-badhash), or removed. This patch ensures that --continue doesn't rename/remove existing and/or fully retrieved files (HTTP 416) which fail the sanity tests.
This commit is contained in:
parent
96554861f9
commit
ff444ebc2a
4
NEWS
4
NEWS
@ -9,6 +9,10 @@ Please send GNU Wget bug reports to <bug-wget@gnu.org>.
|
||||
|
||||
* Changes in Wget X.Y.Z
|
||||
|
||||
* When processing a Metalink file, with --continue resume partially
|
||||
downloaded files and keep fully downloaded files even if they fail
|
||||
the verification.
|
||||
|
||||
* When processing a Metalink file, create the parent directories of a
|
||||
"path/file" destination file name:
|
||||
https://tools.ietf.org/html/rfc5854#section-4.1.2.1
|
||||
|
@ -96,12 +96,14 @@ retrieve_from_metalink (const metalink_t* metalink)
|
||||
1 -> verified successfully */
|
||||
char sig_status = 0;
|
||||
|
||||
bool skip_mfile = false;
|
||||
|
||||
output_stream = NULL;
|
||||
|
||||
DEBUGP (("Processing metalink file %s...\n", quote (mfile->name)));
|
||||
|
||||
/* Resources are sorted by priority. */
|
||||
for (mres_ptr = mfile->resources; *mres_ptr; mres_ptr++)
|
||||
for (mres_ptr = mfile->resources; *mres_ptr && !skip_mfile; mres_ptr++)
|
||||
{
|
||||
metalink_resource_t *mres = *mres_ptr;
|
||||
metalink_checksum_t **mchksum_ptr, *mchksum;
|
||||
@ -117,26 +119,30 @@ retrieve_from_metalink (const metalink_t* metalink)
|
||||
continue;
|
||||
}
|
||||
|
||||
retr_err = METALINK_RETR_ERROR;
|
||||
|
||||
/* If output_stream is not NULL, then we have failed on
|
||||
previous resource and are retrying. Thus, rename/remove
|
||||
the file. */
|
||||
if (output_stream)
|
||||
/* The file is fully downloaded, but some problems were
|
||||
encountered (checksum failure?). The loop had been
|
||||
continued to switch to the next url. */
|
||||
if (output_stream && retr_err == RETROK)
|
||||
{
|
||||
/* Do not rename/remove a continued file. Skip it. */
|
||||
if (opt.always_rest)
|
||||
{
|
||||
skip_mfile = true;
|
||||
continue;
|
||||
}
|
||||
|
||||
fclose (output_stream);
|
||||
output_stream = NULL;
|
||||
badhash_or_remove (filename);
|
||||
xfree (filename);
|
||||
}
|
||||
else if (filename)
|
||||
else if (!output_stream && filename)
|
||||
{
|
||||
/* Rename/remove the file downloaded previously before
|
||||
downloading it again. */
|
||||
badhash_or_remove (filename);
|
||||
xfree (filename);
|
||||
}
|
||||
|
||||
retr_err = METALINK_RETR_ERROR;
|
||||
|
||||
/* Parse our resource URL. */
|
||||
iri = iri_new ();
|
||||
set_uri_encoding (iri, opt.locale, true);
|
||||
@ -156,17 +162,29 @@ retrieve_from_metalink (const metalink_t* metalink)
|
||||
/* Avoid recursive Metalink from HTTP headers. */
|
||||
bool _metalink_http = opt.metalink_over_http;
|
||||
|
||||
/* Assure proper local file name regardless of the URL
|
||||
of particular Metalink resource.
|
||||
To do that we create the local file here and put
|
||||
it as output_stream. We restore the original configuration
|
||||
after we are finished with the file. */
|
||||
if (opt.always_rest)
|
||||
/* continue previous download */
|
||||
output_stream = fopen (mfile->name, "ab");
|
||||
/* If output_stream is not NULL, then we have failed on
|
||||
previous resource and are retrying. Thus, continue
|
||||
with the next resource. Do not close output_stream
|
||||
while iterating over the resources, or the download
|
||||
progress will be lost. */
|
||||
if (output_stream)
|
||||
{
|
||||
DEBUGP (("Previous resource failed, continue with next resource.\n"));
|
||||
}
|
||||
else
|
||||
/* create a file with an unique name */
|
||||
output_stream = unique_create (mfile->name, true, &filename);
|
||||
{
|
||||
/* Assure proper local file name regardless of the URL
|
||||
of particular Metalink resource.
|
||||
To do that we create the local file here and put
|
||||
it as output_stream. We restore the original configuration
|
||||
after we are finished with the file. */
|
||||
if (opt.always_rest)
|
||||
/* continue previous download */
|
||||
output_stream = fopen (mfile->name, "ab");
|
||||
else
|
||||
/* create a file with an unique name */
|
||||
output_stream = unique_create (mfile->name, true, &filename);
|
||||
}
|
||||
|
||||
output_stream_regular = true;
|
||||
|
||||
@ -563,7 +581,7 @@ gpg_skip_verification:
|
||||
/* Rename the file if error encountered; remove if option specified.
|
||||
Note: the file has been downloaded using *_loop. Therefore, it
|
||||
is not necessary to keep the file for continuated download. */
|
||||
if ((retr_err != RETROK || opt.delete_after)
|
||||
if (((retr_err != RETROK && !opt.always_rest) || opt.delete_after)
|
||||
&& filename != NULL && file_exists_p (filename))
|
||||
{
|
||||
badhash_or_remove (filename);
|
||||
|
@ -29,6 +29,7 @@
|
||||
if METALINK_IS_ENABLED
|
||||
METALINK_TESTS = Test-metalink-http.py \
|
||||
Test-metalink-xml.py \
|
||||
Test-metalink-xml-continue.py \
|
||||
Test-metalink-xml-relpath.py \
|
||||
Test-metalink-xml-abspath.py
|
||||
else
|
||||
|
185
testenv/Test-metalink-xml-continue.py
Normal file
185
testenv/Test-metalink-xml-continue.py
Normal file
@ -0,0 +1,185 @@
|
||||
#!/usr/bin/env python3
|
||||
from sys import exit
|
||||
from test.http_test import HTTPTest
|
||||
from misc.wget_file import WgetFile
|
||||
import hashlib
|
||||
|
||||
"""
|
||||
This is to test Metalink/XML --continue support in Wget.
|
||||
"""
|
||||
############# File Definitions ###############################################
|
||||
bad = "Ouch!"
|
||||
|
||||
# partial File1 to continue
|
||||
File0 = "Would you like"
|
||||
|
||||
File1 = "Would you like some Tea?"
|
||||
File1_lowPref = "Do not take this"
|
||||
File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest ()
|
||||
|
||||
File2 = "This is gonna be good"
|
||||
File2_lowPref = "Not this one too"
|
||||
File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest ()
|
||||
|
||||
File3 = "A little more, please"
|
||||
File3_lowPref = "That's just too much"
|
||||
File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest ()
|
||||
|
||||
File4 = "Maybe a biscuit?"
|
||||
File4_lowPref = "No, thanks"
|
||||
File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest ()
|
||||
|
||||
File5 = "More Tea...?"
|
||||
File5_lowPref = "I have to go..."
|
||||
File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest ()
|
||||
|
||||
MetaXml = \
|
||||
"""<?xml version="1.0" encoding="utf-8"?>
|
||||
<metalink version="3.0" xmlns="http://www.metalinker.org/">
|
||||
<publisher>
|
||||
<name>GNU Wget</name>
|
||||
</publisher>
|
||||
<license>
|
||||
<name>GNU GPL</name>
|
||||
<url>http://www.gnu.org/licenses/gpl.html</url>
|
||||
</license>
|
||||
<identity>Wget Test Files</identity>
|
||||
<version>1.2.3</version>
|
||||
<description>Wget Test Files description</description>
|
||||
<files>
|
||||
<file name="File1">
|
||||
<verification>
|
||||
<hash type="sha256">{{FILE1_HASH}}</hash>
|
||||
</verification>
|
||||
<resources>
|
||||
<url type="http" preference="25">http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref</url>
|
||||
<url type="http" preference="30">http://{{SRV_HOST}}:{{SRV_PORT}}/File1</url>
|
||||
</resources>
|
||||
</file>
|
||||
<file name="File2"> <!-- no good resources on purpose, this file shall be kept -->
|
||||
<verification>
|
||||
<hash type="sha256">{{FILE2_HASH}}</hash>
|
||||
</verification>
|
||||
<resources>
|
||||
<url type="http" preference="35">http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file</url>
|
||||
<url type="http" preference="40">http://{{SRV_HOST}}:{{SRV_PORT}}/404</url>
|
||||
</resources>
|
||||
</file>
|
||||
<file name="File3">
|
||||
<verification>
|
||||
<hash type="sha256">{{FILE3_HASH}}</hash>
|
||||
</verification>
|
||||
<resources>
|
||||
<url type="http" preference="25">http://{{SRV_HOST}}:{{SRV_PORT}}/File3_lowPref</url>
|
||||
<url type="http" preference="30">http://{{SRV_HOST}}:{{SRV_PORT}}/File3</url>
|
||||
</resources>
|
||||
</file>
|
||||
<file name="File4"> <!-- no good resources on purpose, this file shall be kept -->
|
||||
<verification>
|
||||
<hash type="sha256">{{FILE4_HASH}}</hash>
|
||||
</verification>
|
||||
<resources>
|
||||
<url type="http" preference="35">http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file</url>
|
||||
</resources>
|
||||
</file>
|
||||
<file name="File5">
|
||||
<verification>
|
||||
<hash type="sha256">{{FILE5_HASH}}</hash>
|
||||
</verification>
|
||||
<resources>
|
||||
<url type="http" preference="25">http://{{SRV_HOST}}:{{SRV_PORT}}/File5_lowPref</url>
|
||||
<url type="http" preference="30">http://{{SRV_HOST}}:{{SRV_PORT}}/File5</url>
|
||||
</resources>
|
||||
</file>
|
||||
</files>
|
||||
</metalink>
|
||||
"""
|
||||
|
||||
wrong_file = WgetFile ("wrong_file", bad)
|
||||
|
||||
# partial File1_down to continue
|
||||
File0_part = WgetFile ("File1", File0)
|
||||
|
||||
File1_orig = WgetFile ("File1", File1)
|
||||
File1_down = WgetFile ("File1", File1)
|
||||
File1_nono = WgetFile ("File1_lowPref", File1_lowPref)
|
||||
|
||||
# no good resources on purpose, this file shall be kept
|
||||
File2_ouch = WgetFile ("File2", bad)
|
||||
|
||||
File3_orig = WgetFile ("File3", File3)
|
||||
File3_down = WgetFile ("File3", File3)
|
||||
File3_nono = WgetFile ("File3_lowPref", File3_lowPref)
|
||||
|
||||
# no good resources on purpose, this file shall be kept
|
||||
File4_ouch = WgetFile ("File4", bad)
|
||||
|
||||
File5_orig = WgetFile ("File5", File5)
|
||||
File5_down = WgetFile ("File5", File5)
|
||||
File5_nono = WgetFile ("File5_lowPref", File5_lowPref)
|
||||
|
||||
MetaFile = WgetFile ("test.metalink", MetaXml)
|
||||
|
||||
WGET_OPTIONS = "--continue --input-metalink test.metalink"
|
||||
WGET_URLS = [[]]
|
||||
|
||||
Files = [[
|
||||
wrong_file,
|
||||
File1_orig, File1_nono,
|
||||
File3_orig, File3_nono,
|
||||
File5_orig, File5_nono
|
||||
]]
|
||||
Existing_Files = [
|
||||
File0_part, # partial File1_down to continue
|
||||
File2_ouch, # wrong but fully downloaded file
|
||||
File3_down, # right and fully downloaded file
|
||||
File4_ouch, # wrong but fully downloaded file
|
||||
MetaFile
|
||||
]
|
||||
|
||||
ExpectedReturnCode = 1
|
||||
ExpectedDownloadedFiles = [
|
||||
File1_down, # continued file from File0_part
|
||||
File2_ouch, # wrong but fully downloaded file
|
||||
File3_down, # right and fully downloaded file
|
||||
File4_ouch, # wrong but fully downloaded file
|
||||
File5_down, # newly fully donwloaded file
|
||||
MetaFile
|
||||
]
|
||||
|
||||
################ Pre and Post Test Hooks #####################################
|
||||
pre_test = {
|
||||
"ServerFiles" : Files,
|
||||
"LocalFiles" : Existing_Files
|
||||
}
|
||||
test_options = {
|
||||
"WgetCommands" : WGET_OPTIONS,
|
||||
"Urls" : WGET_URLS
|
||||
}
|
||||
post_test = {
|
||||
"ExpectedFiles" : ExpectedDownloadedFiles,
|
||||
"ExpectedRetcode" : ExpectedReturnCode
|
||||
}
|
||||
|
||||
http_test = HTTPTest (
|
||||
pre_hook=pre_test,
|
||||
test_params=test_options,
|
||||
post_hook=post_test,
|
||||
)
|
||||
|
||||
http_test.server_setup()
|
||||
### Get and use dynamic server sockname
|
||||
srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname ()
|
||||
|
||||
MetaXml = MetaXml.replace('{{FILE1_HASH}}', File1_sha256)
|
||||
MetaXml = MetaXml.replace('{{FILE2_HASH}}', File2_sha256)
|
||||
MetaXml = MetaXml.replace('{{FILE3_HASH}}', File3_sha256)
|
||||
MetaXml = MetaXml.replace('{{FILE4_HASH}}', File4_sha256)
|
||||
MetaXml = MetaXml.replace('{{FILE5_HASH}}', File5_sha256)
|
||||
MetaXml = MetaXml.replace('{{SRV_HOST}}', srv_host)
|
||||
MetaXml = MetaXml.replace('{{SRV_PORT}}', str (srv_port))
|
||||
MetaFile.content = MetaXml
|
||||
|
||||
err = http_test.begin ()
|
||||
|
||||
exit (err)
|
Loading…
Reference in New Issue
Block a user