diff --git a/src/metalink.c b/src/metalink.c index 8f1efb90..62f3de6f 100644 --- a/src/metalink.c +++ b/src/metalink.c @@ -41,6 +41,7 @@ as that of the covered work. */ #include "sha256.h" #include "sha512.h" #include "dosname.h" +#include "xmemdup0.h" #include "xstrndup.h" #include "c-strcase.h" #include <errno.h> @@ -197,6 +198,8 @@ retrieve_from_metalink (const metalink_t* metalink) struct url *url; int url_err; + clean_metalink_string (&mres->url); + if (!RES_TYPE_SUPPORTED (mres->type)) { logprintf (LOG_VERBOSE, @@ -780,6 +783,46 @@ append_suffix_number (char **str, const char *sep, wgint num) *str = new; } +/* + Remove the string's trailing/leading whitespaces and line breaks. + + The string is permanently modified. +*/ +void +clean_metalink_string (char **str) +{ + int c; + size_t len; + char *new, *beg, *end; + + if (!str || !*str) + return; + + beg = *str; + + while ((c = *beg) && (c == '\n' || c == '\r' || c == '\t' || c == ' ')) + beg++; + + end = beg; + + /* To not truncate a string containing spaces, search the first '\r' + or '\n' which ipotetically marks the end of the string. */ + while ((c = *end) && (c != '\r') && (c != '\n')) + end++; + + /* If we are at the end of the string, search the first legit + character going backward. */ + if (*end == '\0') + while ((c = *(end - 1)) && (c == '\n' || c == '\r' || c == '\t' || c == ' ')) + end--; + + len = end - beg; + + new = xmemdup0 (beg, len); + xfree (*str); + *str = new; +} + /* Append the suffix ".badhash" to the file NAME, except without overwriting an existing file with that name and suffix. */ void diff --git a/src/metalink.h b/src/metalink.h index c9dd73ed..4095262e 100644 --- a/src/metalink.h +++ b/src/metalink.h @@ -52,6 +52,7 @@ int metalink_check_safe_path (const char *path); char *last_component (char const *name); char *get_metalink_basename (char *name); void append_suffix_number (char **str, const char *sep, wgint num); +void clean_metalink_string (char **str); void badhash_suffix (char *name); void badhash_or_remove (char *name); diff --git a/testenv/Makefile.am b/testenv/Makefile.am index e6f9a239..4ad7d0a0 100644 --- a/testenv/Makefile.am +++ b/testenv/Makefile.am @@ -46,7 +46,8 @@ if METALINK_IS_ENABLED Test-metalink-xml-absprefix-trust.py \ Test-metalink-xml-homeprefix-trust.py \ Test-metalink-xml-size.py \ - Test-metalink-xml-nourls.py + Test-metalink-xml-nourls.py \ + Test-metalink-xml-urlbreak.py else METALINK_TESTS = endif diff --git a/testenv/Test-metalink-xml-urlbreak.py b/testenv/Test-metalink-xml-urlbreak.py new file mode 100755 index 00000000..e53ae116 --- /dev/null +++ b/testenv/Test-metalink-xml-urlbreak.py @@ -0,0 +1,236 @@ +#!/usr/bin/env python3 +from sys import exit +from test.http_test import HTTPTest +from misc.wget_file import WgetFile +import hashlib + +""" + This is to test Metalink/XML white spaces in url resources. + + With --trust-server-names, trust the metalink:file names. + + Without --trust-server-names, don't trust the metalink:file names: + use the basename of --input-metalink, and add a sequential number + (e.g. .#1, .#2, etc.). + + Strip the directory from unsafe paths. +""" +############# File Definitions ############################################### +bad = "Ouch!" + +File1 = "Would you like some Tea?" +File1_lowPref = "Do not take this" +File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest () + +File2 = "This is gonna be good" +File2_lowPref = "Not this one too" +File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest () + +File3 = "A little more, please" +File3_lowPref = "That's just too much" +File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest () + +File4 = "Maybe a biscuit?" +File4_lowPref = "No, thanks" +File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest () + +File5 = "More Tea...?" +File5_lowPref = "I have to go..." +File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest () + +MetaXml = \ +"""<?xml version="1.0" encoding="utf-8"?> +<metalink version="3.0" xmlns="http://www.metalinker.org/"> + <publisher> + <name>GNU Wget</name> + </publisher> + <license> + <name>GNU GPL</name> + <url>http://www.gnu.org/licenses/gpl.html</url> + </license> + <identity>Wget Test Files</identity> + <version>1.2.3</version> + <description>Wget Test Files description</description> + <files> + <file name="File1"> + <verification> + <hash type="sha256">{{FILE1_HASH}}</hash> + </verification> + <resources> + <url type="http" preference="35"> + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + </url> + <url type="http" preference="40"> + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + </url> + <url type="http" preference="25"> + http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref + </url> + <url type="http" preference="30"> + http://{{SRV_HOST}}:{{SRV_PORT}}/File1 + </url> + </resources> + </file> + <file name="File2"> + <verification> + <hash type="sha256">{{FILE2_HASH}}</hash> + </verification> + <resources> + <url type="http" preference="35"> + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + </url> + <url type="http" preference="40"> + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + </url> + <url type="http" preference="25"> + http://{{SRV_HOST}}:{{SRV_PORT}}/File2_lowPref + </url> + <url type="http" preference="30"> + http://{{SRV_HOST}}:{{SRV_PORT}}/File2 + </url> + </resources> + </file> + <file name="File3"> + <verification> + <hash type="sha256">{{FILE3_HASH}}</hash> + </verification> + <resources> + <url type="http" preference="35"> + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + </url> + <url type="http" preference="40"> + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + </url> + <url type="http" preference="25"> + http://{{SRV_HOST}}:{{SRV_PORT}}/File3_lowPref + </url> + <url type="http" preference="30"> + http://{{SRV_HOST}}:{{SRV_PORT}}/File3 + </url> + </resources> + </file> + <file name="File4"> + <verification> + <hash type="sha256">{{FILE4_HASH}}</hash> + </verification> + <resources> + <url type="http" preference="35"> + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + </url> + <url type="http" preference="40"> + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + </url> + <url type="http" preference="25"> + http://{{SRV_HOST}}:{{SRV_PORT}}/File4_lowPref + </url> + <url type="http" preference="30"> + http://{{SRV_HOST}}:{{SRV_PORT}}/File4 + </url> + </resources> + </file> + <file name="File5"> + <verification> + <hash type="sha256">{{FILE5_HASH}}</hash> + </verification> + <resources> + <url type="http" preference="35"> + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + </url> + <url type="http" preference="40"> + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + </url> + <url type="http" preference="25"> + http://{{SRV_HOST}}:{{SRV_PORT}}/File5_lowPref + </url> + <url type="http" preference="30"> + http://{{SRV_HOST}}:{{SRV_PORT}}/File5 + </url> + </resources> + </file> + </files> +</metalink> +""" + +wrong_file = WgetFile ("wrong_file", bad) + +File1_orig = WgetFile ("File1", File1) +File1_down = WgetFile ("test.metalink.#1", File1) +File1_nono = WgetFile ("File1_lowPref", File1_lowPref) + +File2_orig = WgetFile ("File2", File2) +File2_down = WgetFile ("test.metalink.#2", File2) +File2_nono = WgetFile ("File2_lowPref", File2_lowPref) + +File3_orig = WgetFile ("File3", File3) +File3_down = WgetFile ("test.metalink.#3", File3) +File3_nono = WgetFile ("File3_lowPref", File3_lowPref) + +File4_orig = WgetFile ("File4", File4) +File4_down = WgetFile ("test.metalink.#4", File4) +File4_nono = WgetFile ("File4_lowPref", File4_lowPref) + +File5_orig = WgetFile ("File5", File5) +File5_down = WgetFile ("test.metalink.#5", File5) +File5_nono = WgetFile ("File5_lowPref", File5_lowPref) + +MetaFile = WgetFile ("test.metalink", MetaXml) + +WGET_OPTIONS = "--input-metalink test.metalink" +WGET_URLS = [[]] + +Files = [[ + wrong_file, + File1_orig, File1_nono, + File2_orig, File2_nono, + File3_orig, File3_nono, + File4_orig, File4_nono, + File5_orig, File5_nono +]] +Existing_Files = [MetaFile] + +ExpectedReturnCode = 0 +ExpectedDownloadedFiles = [ + File1_down, + File2_down, + File3_down, + File4_down, + File5_down, + MetaFile +] + +################ Pre and Post Test Hooks ##################################### +pre_test = { + "ServerFiles" : Files, + "LocalFiles" : Existing_Files +} +test_options = { + "WgetCommands" : WGET_OPTIONS, + "Urls" : WGET_URLS +} +post_test = { + "ExpectedFiles" : ExpectedDownloadedFiles, + "ExpectedRetcode" : ExpectedReturnCode +} + +http_test = HTTPTest ( + pre_hook=pre_test, + test_params=test_options, + post_hook=post_test, +) + +http_test.server_setup() +### Get and use dynamic server sockname +srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname () + +MetaXml = MetaXml.replace('{{FILE1_HASH}}', File1_sha256) +MetaXml = MetaXml.replace('{{FILE2_HASH}}', File2_sha256) +MetaXml = MetaXml.replace('{{FILE3_HASH}}', File3_sha256) +MetaXml = MetaXml.replace('{{FILE4_HASH}}', File4_sha256) +MetaXml = MetaXml.replace('{{FILE5_HASH}}', File5_sha256) +MetaXml = MetaXml.replace('{{SRV_HOST}}', srv_host) +MetaXml = MetaXml.replace('{{SRV_PORT}}', str (srv_port)) +MetaFile.content = MetaXml + +err = http_test.begin () + +exit (err)