From f3f349a0cf0598fdf3a40d909075974c7bc992bc Mon Sep 17 00:00:00 2001 From: Matthew White Date: Tue, 16 Aug 2016 23:12:56 +0200 Subject: [PATCH] Add file size computation in Metalink module * NEWS: Mention Metalink's file size verification * src/metalink.c (retrieve_from_metalink): Add file size computation * doc/metalink.txt: Update document. Remove resolved bugs Reject downloaded files when they do not agree with their Metalink/XML metalink:size: https://tools.ietf.org/html/rfc5854#section-4.2.14 At the moment of writing, Metalink/HTTP headers do not provide a file size field. This information could be obtained from the Content-Length header field: https://tools.ietf.org/html/rfc6249#section-7 --- NEWS | 4 ++++ doc/metalink.txt | 11 ++++------- src/metalink.c | 35 +++++++++++++++++++++++++++++++++++ 3 files changed, 43 insertions(+), 7 deletions(-) diff --git a/NEWS b/NEWS index 60112506..04718d59 100644 --- a/NEWS +++ b/NEWS @@ -9,6 +9,10 @@ Please send GNU Wget bug reports to . * Changes in Wget X.Y.Z +* When processing a Metalink file, reject downloaded files which don't + agree with their own metalink:size value: + https://tools.ietf.org/html/rfc5854#section-4.2.16 + * When processing a Metalink file, with --continue resume partially downloaded files and keep fully downloaded files even if they fail the verification. diff --git a/doc/metalink.txt b/doc/metalink.txt index 0f3706a9..904ef2e0 100644 --- a/doc/metalink.txt +++ b/doc/metalink.txt @@ -67,7 +67,8 @@ References: When a Metalink/XML file is parsed: 1. create the metalink:file "path/file" tree; 2. download the metalink:url file as "path/file"; -3. verify the "path/file" checksum. +3. verify the "path/file" size, if declared; +4. verify the "path/file" checksum. All the above conform to the RFC5854 standard. @@ -79,11 +80,6 @@ References: If more metalink:file elements are the same, wget downloads them all. -4.4 Bugs -======== - -The download is OK even when metalink:file size is wrong. - 5. `wget --metalink-over-http` ****************************** @@ -107,7 +103,8 @@ References: When a Metalink/HTTP header is parsed: 1. extract metalink metadata from the header; 2. download the file from the mirror with the highest priority; -3. verify the file's checksum. +3. verify the file's size, if declared; +4. verify the file's checksum. All the above comform to the usual Wget's download behaviour and to the RFC6249 standard. diff --git a/src/metalink.c b/src/metalink.c index 03a0bb13..ee0ed085 100644 --- a/src/metalink.c +++ b/src/metalink.c @@ -240,6 +240,41 @@ retrieve_from_metalink (const metalink_t* metalink) continue; } + logprintf (LOG_VERBOSE, _("Computing size for %s\n"), quote (filename)); + + if (!mfile->size) + { + logprintf (LOG_VERBOSE, _("File size not declared. Skipping check.\n")); + } + else + { + wgint local_file_size = file_size (filename); + + if (local_file_size == -1) + { + logprintf (LOG_NOTQUIET, _("Could not get downloaded file's size.\n")); + fclose (local_file); + local_file = NULL; + continue; + } + + /* FIXME: what about int64? */ + DEBUGP (("Declared size: %lld\n", mfile->size)); + DEBUGP (("Computed size: %lld\n", (long long) local_file_size)); + + if (local_file_size != (wgint) mfile->size) + { + logprintf (LOG_NOTQUIET, _("Size mismatch for file %s.\n"), quote (filename)); + fclose (local_file); + local_file = NULL; + continue; + } + else + { + logputs (LOG_VERBOSE, _("Size matches.\n")); + } + } + for (mchksum_ptr = mfile->checksums; *mchksum_ptr; mchksum_ptr++) { char md2[MD2_DIGEST_SIZE];