From ff444ebc2adf3bb76d4827e7133567e2a037d6f4 Mon Sep 17 00:00:00 2001 From: Matthew White Date: Thu, 4 Aug 2016 11:35:42 +0200 Subject: [PATCH] Bugfix: Keep the download progress when alternating metalink:url * NEWS: Mention the effects of --continue over Metalink * src/metalink.c (retrieve_from_metalink): On download error, resume output_stream with the next mres->url. Keep fully downloaded files started with --continue, otherwise rename/remove the file * testenv/Makefile.am: Add new file * testenv/Test-metalink-xml-continue.py: New file. Metalink/XML continue/keep existing files (HTTP 416) with --continue tests Before this patch, with --continue, existing and/or fully retrieved files which fail the sanity tests were renamed (--keep-badhash), or removed. This patch ensures that --continue doesn't rename/remove existing and/or fully retrieved files (HTTP 416) which fail the sanity tests. --- NEWS | 4 + src/metalink.c | 62 ++++++--- testenv/Makefile.am | 1 + testenv/Test-metalink-xml-continue.py | 185 ++++++++++++++++++++++++++ 4 files changed, 230 insertions(+), 22 deletions(-) create mode 100644 testenv/Test-metalink-xml-continue.py diff --git a/NEWS b/NEWS index 02994184..60112506 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,10 @@ Please send GNU Wget bug reports to . * Changes in Wget X.Y.Z +* When processing a Metalink file, with --continue resume partially + downloaded files and keep fully downloaded files even if they fail + the verification. + * When processing a Metalink file, create the parent directories of a "path/file" destination file name: https://tools.ietf.org/html/rfc5854#section-4.1.2.1 diff --git a/src/metalink.c b/src/metalink.c index 8294a7e9..03a0bb13 100644 --- a/src/metalink.c +++ b/src/metalink.c @@ -96,12 +96,14 @@ retrieve_from_metalink (const metalink_t* metalink) 1 -> verified successfully */ char sig_status = 0; + bool skip_mfile = false; + output_stream = NULL; DEBUGP (("Processing metalink file %s...\n", quote (mfile->name))); /* Resources are sorted by priority. */ - for (mres_ptr = mfile->resources; *mres_ptr; mres_ptr++) + for (mres_ptr = mfile->resources; *mres_ptr && !skip_mfile; mres_ptr++) { metalink_resource_t *mres = *mres_ptr; metalink_checksum_t **mchksum_ptr, *mchksum; @@ -117,26 +119,30 @@ retrieve_from_metalink (const metalink_t* metalink) continue; } - retr_err = METALINK_RETR_ERROR; - - /* If output_stream is not NULL, then we have failed on - previous resource and are retrying. Thus, rename/remove - the file. */ - if (output_stream) + /* The file is fully downloaded, but some problems were + encountered (checksum failure?). The loop had been + continued to switch to the next url. */ + if (output_stream && retr_err == RETROK) { + /* Do not rename/remove a continued file. Skip it. */ + if (opt.always_rest) + { + skip_mfile = true; + continue; + } + fclose (output_stream); output_stream = NULL; badhash_or_remove (filename); xfree (filename); } - else if (filename) + else if (!output_stream && filename) { - /* Rename/remove the file downloaded previously before - downloading it again. */ - badhash_or_remove (filename); xfree (filename); } + retr_err = METALINK_RETR_ERROR; + /* Parse our resource URL. */ iri = iri_new (); set_uri_encoding (iri, opt.locale, true); @@ -156,17 +162,29 @@ retrieve_from_metalink (const metalink_t* metalink) /* Avoid recursive Metalink from HTTP headers. */ bool _metalink_http = opt.metalink_over_http; - /* Assure proper local file name regardless of the URL - of particular Metalink resource. - To do that we create the local file here and put - it as output_stream. We restore the original configuration - after we are finished with the file. */ - if (opt.always_rest) - /* continue previous download */ - output_stream = fopen (mfile->name, "ab"); + /* If output_stream is not NULL, then we have failed on + previous resource and are retrying. Thus, continue + with the next resource. Do not close output_stream + while iterating over the resources, or the download + progress will be lost. */ + if (output_stream) + { + DEBUGP (("Previous resource failed, continue with next resource.\n")); + } else - /* create a file with an unique name */ - output_stream = unique_create (mfile->name, true, &filename); + { + /* Assure proper local file name regardless of the URL + of particular Metalink resource. + To do that we create the local file here and put + it as output_stream. We restore the original configuration + after we are finished with the file. */ + if (opt.always_rest) + /* continue previous download */ + output_stream = fopen (mfile->name, "ab"); + else + /* create a file with an unique name */ + output_stream = unique_create (mfile->name, true, &filename); + } output_stream_regular = true; @@ -563,7 +581,7 @@ gpg_skip_verification: /* Rename the file if error encountered; remove if option specified. Note: the file has been downloaded using *_loop. Therefore, it is not necessary to keep the file for continuated download. */ - if ((retr_err != RETROK || opt.delete_after) + if (((retr_err != RETROK && !opt.always_rest) || opt.delete_after) && filename != NULL && file_exists_p (filename)) { badhash_or_remove (filename); diff --git a/testenv/Makefile.am b/testenv/Makefile.am index b68ef8f2..94be0236 100644 --- a/testenv/Makefile.am +++ b/testenv/Makefile.am @@ -29,6 +29,7 @@ if METALINK_IS_ENABLED METALINK_TESTS = Test-metalink-http.py \ Test-metalink-xml.py \ + Test-metalink-xml-continue.py \ Test-metalink-xml-relpath.py \ Test-metalink-xml-abspath.py else diff --git a/testenv/Test-metalink-xml-continue.py b/testenv/Test-metalink-xml-continue.py new file mode 100644 index 00000000..8e3c40ae --- /dev/null +++ b/testenv/Test-metalink-xml-continue.py @@ -0,0 +1,185 @@ +#!/usr/bin/env python3 +from sys import exit +from test.http_test import HTTPTest +from misc.wget_file import WgetFile +import hashlib + +""" + This is to test Metalink/XML --continue support in Wget. +""" +############# File Definitions ############################################### +bad = "Ouch!" + +# partial File1 to continue +File0 = "Would you like" + +File1 = "Would you like some Tea?" +File1_lowPref = "Do not take this" +File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest () + +File2 = "This is gonna be good" +File2_lowPref = "Not this one too" +File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest () + +File3 = "A little more, please" +File3_lowPref = "That's just too much" +File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest () + +File4 = "Maybe a biscuit?" +File4_lowPref = "No, thanks" +File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest () + +File5 = "More Tea...?" +File5_lowPref = "I have to go..." +File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest () + +MetaXml = \ +""" + + + GNU Wget + + + GNU GPL + http://www.gnu.org/licenses/gpl.html + + Wget Test Files + 1.2.3 + Wget Test Files description + + + + {{FILE1_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File1 + + + + + {{FILE2_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + + + + + {{FILE3_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/File3_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File3 + + + + + {{FILE4_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + + + + + {{FILE5_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/File5_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File5 + + + + +""" + +wrong_file = WgetFile ("wrong_file", bad) + +# partial File1_down to continue +File0_part = WgetFile ("File1", File0) + +File1_orig = WgetFile ("File1", File1) +File1_down = WgetFile ("File1", File1) +File1_nono = WgetFile ("File1_lowPref", File1_lowPref) + +# no good resources on purpose, this file shall be kept +File2_ouch = WgetFile ("File2", bad) + +File3_orig = WgetFile ("File3", File3) +File3_down = WgetFile ("File3", File3) +File3_nono = WgetFile ("File3_lowPref", File3_lowPref) + +# no good resources on purpose, this file shall be kept +File4_ouch = WgetFile ("File4", bad) + +File5_orig = WgetFile ("File5", File5) +File5_down = WgetFile ("File5", File5) +File5_nono = WgetFile ("File5_lowPref", File5_lowPref) + +MetaFile = WgetFile ("test.metalink", MetaXml) + +WGET_OPTIONS = "--continue --input-metalink test.metalink" +WGET_URLS = [[]] + +Files = [[ + wrong_file, + File1_orig, File1_nono, + File3_orig, File3_nono, + File5_orig, File5_nono +]] +Existing_Files = [ + File0_part, # partial File1_down to continue + File2_ouch, # wrong but fully downloaded file + File3_down, # right and fully downloaded file + File4_ouch, # wrong but fully downloaded file + MetaFile +] + +ExpectedReturnCode = 1 +ExpectedDownloadedFiles = [ + File1_down, # continued file from File0_part + File2_ouch, # wrong but fully downloaded file + File3_down, # right and fully downloaded file + File4_ouch, # wrong but fully downloaded file + File5_down, # newly fully donwloaded file + MetaFile +] + +################ Pre and Post Test Hooks ##################################### +pre_test = { + "ServerFiles" : Files, + "LocalFiles" : Existing_Files +} +test_options = { + "WgetCommands" : WGET_OPTIONS, + "Urls" : WGET_URLS +} +post_test = { + "ExpectedFiles" : ExpectedDownloadedFiles, + "ExpectedRetcode" : ExpectedReturnCode +} + +http_test = HTTPTest ( + pre_hook=pre_test, + test_params=test_options, + post_hook=post_test, +) + +http_test.server_setup() +### Get and use dynamic server sockname +srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname () + +MetaXml = MetaXml.replace('{{FILE1_HASH}}', File1_sha256) +MetaXml = MetaXml.replace('{{FILE2_HASH}}', File2_sha256) +MetaXml = MetaXml.replace('{{FILE3_HASH}}', File3_sha256) +MetaXml = MetaXml.replace('{{FILE4_HASH}}', File4_sha256) +MetaXml = MetaXml.replace('{{FILE5_HASH}}', File5_sha256) +MetaXml = MetaXml.replace('{{SRV_HOST}}', srv_host) +MetaXml = MetaXml.replace('{{SRV_PORT}}', str (srv_port)) +MetaFile.content = MetaXml + +err = http_test.begin () + +exit (err)