diff --git a/doc/metalink-standard.txt b/doc/metalink-standard.txt index 4836a852..d54e83e8 100644 --- a/doc/metalink-standard.txt +++ b/doc/metalink-standard.txt @@ -86,6 +86,10 @@ the mother URL is trusted. Any Metalink/HTTP application/metalink4+xml file is saved using the basename of its own Link header "name" field, if available. +In conjunction with the option --content-disposition, a 'Content-Type: +application/metalink4+xml' file is saved using the basename of its own +Content-Disposition header "filename" field, if available. + 3.1.2 The final name ==================== diff --git a/doc/wget.texi b/doc/wget.texi index 8cf32303..f42773ee 100644 --- a/doc/wget.texi +++ b/doc/wget.texi @@ -523,6 +523,7 @@ without overwriting existing files. Issues HTTP HEAD request instead of GET and extracts Metalink metadata from response headers. Then it switches to Metalink download. If no valid Metalink metadata is found, it falls back to ordinary HTTP download. +Enables @samp{Content-Type: application/metalink4+xml} files download/processing. @cindex metalink-index @item --metalink-index=@var{number} @@ -1686,6 +1687,10 @@ This option is useful for some file-downloading CGI programs that use @code{Content-Disposition} headers to describe what the name of a downloaded file should be. +When combined with @samp{--metalink-over-http} and @samp{--trust-server-names}, +a @samp{Content-Type: application/metalink4+xml} file is named using the +@code{Content-Disposition} filename field, if available. + @cindex Content On Error @item --content-on-error diff --git a/src/http.c b/src/http.c index 8fdf49db..89d496b6 100644 --- a/src/http.c +++ b/src/http.c @@ -2570,6 +2570,87 @@ metalink_from_http (const struct response *resp, const struct http_stat *hs, mfile->resources = xnew0 (metalink_resource_t *); mfile->metaurls = xnew0 (metalink_metaurl_t *); + /* Process the Content-Type header. */ + if (resp_header_locate (resp, "Content-Type", 0, &val_beg, &val_end) != -1) + { + metalink_metaurl_t murl = {0}; + + const char *type_beg, *type_end; + char *typestr = NULL; + char *namestr = NULL; + size_t type_len; + + DEBUGP (("Processing Content-Type header...\n")); + + /* Find beginning of type. */ + type_beg = val_beg; + while (type_beg < val_end && c_isspace (*type_beg)) + type_beg++; + + /* Find end of type. */ + type_end = type_beg + 1; + while (type_end < val_end && + *type_end != ';' && + *type_end != ' ' && + *type_end != '\r' && + *type_end != '\n') + type_end++; + + if (type_beg >= val_end || type_end > val_end) + { + DEBUGP (("Invalid Content-Type header. Ignoring.\n")); + goto skip_content_type; + } + + type_len = type_end - type_beg; + typestr = xstrndup (type_beg, type_len); + + DEBUGP (("Content-Type: %s\n", typestr)); + + if (strcmp (typestr, "application/metalink4+xml")) + { + xfree (typestr); + goto skip_content_type; + } + + /* + Valid ranges for the "pri" attribute are from + 1 to 999999. Mirror servers with a lower value of the "pri" + attribute have a higher priority, while mirrors with an undefined + "pri" attribute are considered to have a value of 999999, which is + the lowest priority. + + rfc6249 section 3.1 + */ + murl.priority = DEFAULT_PRI; + + murl.mediatype = typestr; + typestr = NULL; + + if (opt.content_disposition + && resp_header_locate (resp, "Content-Disposition", 0, &val_beg, &val_end) != -1) + { + find_key_value (val_beg, val_end, "filename", &namestr); + murl.name = namestr; + namestr = NULL; + } + + murl.url = xstrdup (u->url); + + DEBUGP (("URL=%s\n", murl.url)); + DEBUGP (("MEDIATYPE=%s\n", murl.mediatype)); + DEBUGP (("NAME=%s\n", murl.name ? murl.name : "")); + DEBUGP (("PRIORITY=%d\n", murl.priority)); + + /* 1 slot from new resource, 1 slot for null-termination. */ + mfile->metaurls = xrealloc (mfile->metaurls, + sizeof (metalink_metaurl_t *) * (meta_count + 2)); + mfile->metaurls[meta_count] = xnew0 (metalink_metaurl_t); + *mfile->metaurls[meta_count] = murl; + meta_count++; + } +skip_content_type: + /* Find all Link headers. */ for (i = 0; (i = resp_header_locate (resp, "Link", i, &val_beg, &val_end)) != -1; diff --git a/testenv/Makefile.am b/testenv/Makefile.am index a82a925a..8f619072 100644 --- a/testenv/Makefile.am +++ b/testenv/Makefile.am @@ -33,6 +33,10 @@ if METALINK_IS_ENABLED Test-metalink-http-xml.py \ Test-metalink-http-xml-trust.py \ Test-metalink-http-xml-trust-name.py \ + Test-metalink-http-xml-type.py \ + Test-metalink-http-xml-type-trust.py \ + Test-metalink-http-xml-type-content.py \ + Test-metalink-http-xml-type-trust-content.py \ Test-metalink-xml.py \ Test-metalink-xml-continue.py \ Test-metalink-xml-relpath.py \ diff --git a/testenv/Test-metalink-http-xml-type-content.py b/testenv/Test-metalink-http-xml-type-content.py new file mode 100755 index 00000000..b1ace278 --- /dev/null +++ b/testenv/Test-metalink-http-xml-type-content.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +from sys import exit +from test.http_test import HTTPTest +from misc.wget_file import WgetFile +import hashlib +from base64 import b64encode + +""" + This is to test Metalink/HTTP with Content-Type Metalink/XML. + + With --trust-server-names, trust the metalink:file names. + + Without --trust-server-names, don't trust the metalink:file names: + use the basename of --input-metalink, and add a sequential number + (e.g. .#1, .#2, etc.). + + Strip the directory from unsafe paths. +""" + +############# File Definitions ############################################### +bad = "Ouch!" + +File1 = "Would you like some Tea?" +File1_lowPref = "Do not take this" +File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest () + +File2 = "This is gonna be good" +File2_lowPref = "Not this one too" +File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest () + +File3 = "A little more, please" +File3_lowPref = "That's just too much" +File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest () + +File4 = "Maybe a biscuit?" +File4_lowPref = "No, thanks" +File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest () + +File5 = "More Tea...?" +File5_lowPref = "I have to go..." +File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest () + +MetaXml = \ +""" + + + GNU Wget + + + GNU GPL + http://www.gnu.org/licenses/gpl.html + + Wget Test Files + 1.2.3 + Wget Test Files description + + + + {{FILE1_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File1 + + + + + {{FILE2_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File2_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File2 + + + + + {{FILE3_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File3_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File3 + + + + + {{FILE4_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File4_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File4 + + + + + {{FILE5_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File5_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File5 + + + + +""" + +# This will be filled as soon as we know server hostname and port +MetaHTTPRules = {'SendHeader' : {}} + +MetaHTTP = WgetFile ("main.metalink", rules=MetaHTTPRules) +MetaHTTP_down = WgetFile ("main.metalink.meta#1", MetaXml) + +wrong_file = WgetFile ("wrong_file", bad) + +File1_orig = WgetFile ("File1", File1) +File1_down = WgetFile ("main.metalink.meta#1.#1", File1) +File1_nono = WgetFile ("File1_lowPref", File1_lowPref) + +File2_orig = WgetFile ("File2", File2) +File2_down = WgetFile ("main.metalink.meta#1.#2", File2) +File2_nono = WgetFile ("File2_lowPref", File2_lowPref) + +# rejected by libmetalink +File3_orig = WgetFile ("File3", File3) +File3_nono = WgetFile ("File3_lowPref", File3_lowPref) + +File4_orig = WgetFile ("File4", File4) +File4_down = WgetFile ("main.metalink.meta#1.#3", File4) +File4_nono = WgetFile ("File4_lowPref", File4_lowPref) + +File5_orig = WgetFile ("File5", File5) +File5_down = WgetFile ("main.metalink.meta#1.#4", File5) +File5_nono = WgetFile ("File5_lowPref", File5_lowPref) + +WGET_OPTIONS = "--metalink-over-http --content-disposition --metalink-index=0" +WGET_URLS = [["main.metalink"]] + +RequestList = [[ + "HEAD /main.metalink", + "GET /main.metalink", + "GET /404", + "GET /wrong_file", + "GET /File1", + "GET /File2", + "GET /File4", + "GET /File5" +]] + +Files = [[ + MetaHTTP, + wrong_file, + File1_orig, File1_nono, + File2_orig, File2_nono, + File3_orig, File3_nono, + File4_orig, File4_nono, + File5_orig, File5_nono +]] +Existing_Files = [] + +ExpectedReturnCode = 0 +ExpectedDownloadedFiles = [ + MetaHTTP_down, + File1_down, + File2_down, + File4_down, + File5_down +] + +################ Pre and Post Test Hooks ##################################### +pre_test = { + "ServerFiles" : Files, + "LocalFiles" : Existing_Files +} +test_options = { + "WgetCommands" : WGET_OPTIONS, + "Urls" : WGET_URLS +} +post_test = { + "ExpectedFiles" : ExpectedDownloadedFiles, + "ExpectedRetcode" : ExpectedReturnCode, + "FilesCrawled" : RequestList +} + +http_test = HTTPTest ( + pre_hook=pre_test, + test_params=test_options, + post_hook=post_test +) + +http_test.server_setup() +### Get and use dynamic server sockname +srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname () + +MetaXml = MetaXml.replace('{{FILE1_HASH}}', File1_sha256) +MetaXml = MetaXml.replace('{{FILE2_HASH}}', File2_sha256) +MetaXml = MetaXml.replace('{{FILE3_HASH}}', File3_sha256) +MetaXml = MetaXml.replace('{{FILE4_HASH}}', File4_sha256) +MetaXml = MetaXml.replace('{{FILE5_HASH}}', File5_sha256) +MetaXml = MetaXml.replace('{{SRV_HOST}}', srv_host) +MetaXml = MetaXml.replace('{{SRV_PORT}}', str (srv_port)) +MetaHTTP_down.content = MetaXml + +MetaHTTP.content = MetaXml + +MetaHTTPRules["SendHeader"] = { + 'Content-Type': 'application/metalink4+xml', + 'Content-Disposition': 'filename="newname.metalink"' +} + +err = http_test.begin () + +exit (err) diff --git a/testenv/Test-metalink-http-xml-type-trust-content.py b/testenv/Test-metalink-http-xml-type-trust-content.py new file mode 100755 index 00000000..7ee41974 --- /dev/null +++ b/testenv/Test-metalink-http-xml-type-trust-content.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +from sys import exit +from test.http_test import HTTPTest +from misc.wget_file import WgetFile +import hashlib +from base64 import b64encode + +""" + This is to test Metalink/HTTP with Content-Type Metalink/XML. + + With --trust-server-names, trust the metalink:file names. + + Without --trust-server-names, don't trust the metalink:file names: + use the basename of --input-metalink, and add a sequential number + (e.g. .#1, .#2, etc.). + + Strip the directory from unsafe paths. +""" + +############# File Definitions ############################################### +bad = "Ouch!" + +File1 = "Would you like some Tea?" +File1_lowPref = "Do not take this" +File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest () + +File2 = "This is gonna be good" +File2_lowPref = "Not this one too" +File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest () + +File3 = "A little more, please" +File3_lowPref = "That's just too much" +File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest () + +File4 = "Maybe a biscuit?" +File4_lowPref = "No, thanks" +File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest () + +File5 = "More Tea...?" +File5_lowPref = "I have to go..." +File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest () + +MetaXml = \ +""" + + + GNU Wget + + + GNU GPL + http://www.gnu.org/licenses/gpl.html + + Wget Test Files + 1.2.3 + Wget Test Files description + + + + {{FILE1_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File1 + + + + + {{FILE2_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File2_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File2 + + + + + {{FILE3_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File3_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File3 + + + + + {{FILE4_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File4_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File4 + + + + + {{FILE5_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File5_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File5 + + + + +""" + +# This will be filled as soon as we know server hostname and port +MetaHTTPRules = {'SendHeader' : {}} + +MetaHTTP = WgetFile ("main.metalink", rules=MetaHTTPRules) +MetaHTTP_down = WgetFile ("newname.metalink", MetaXml) + +wrong_file = WgetFile ("wrong_file", bad) + +File1_orig = WgetFile ("File1", File1) +File1_down = WgetFile ("dir/File1", File1) +File1_nono = WgetFile ("File1_lowPref", File1_lowPref) + +File2_orig = WgetFile ("File2", File2) +File2_down = WgetFile ("dir/File2", File2) +File2_nono = WgetFile ("File2_lowPref", File2_lowPref) + +# rejected by libmetalink +File3_orig = WgetFile ("File3", File3) +File3_nono = WgetFile ("File3_lowPref", File3_lowPref) + +File4_orig = WgetFile ("File4", File4) +File4_down = WgetFile ("dir/File4", File4) +File4_nono = WgetFile ("File4_lowPref", File4_lowPref) + +File5_orig = WgetFile ("File5", File5) +File5_down = WgetFile ("dir/File5", File5) +File5_nono = WgetFile ("File5_lowPref", File5_lowPref) + +WGET_OPTIONS = "--trust-server-names --metalink-over-http --content-disposition --metalink-index=0" +WGET_URLS = [["main.metalink"]] + +RequestList = [[ + "HEAD /main.metalink", + "GET /main.metalink", + "GET /404", + "GET /wrong_file", + "GET /File1", + "GET /File2", + "GET /File4", + "GET /File5" +]] + +Files = [[ + MetaHTTP, + wrong_file, + File1_orig, File1_nono, + File2_orig, File2_nono, + File3_orig, File3_nono, + File4_orig, File4_nono, + File5_orig, File5_nono +]] +Existing_Files = [] + +ExpectedReturnCode = 0 +ExpectedDownloadedFiles = [ + MetaHTTP_down, + File1_down, + File2_down, + File4_down, + File5_down +] + +################ Pre and Post Test Hooks ##################################### +pre_test = { + "ServerFiles" : Files, + "LocalFiles" : Existing_Files +} +test_options = { + "WgetCommands" : WGET_OPTIONS, + "Urls" : WGET_URLS +} +post_test = { + "ExpectedFiles" : ExpectedDownloadedFiles, + "ExpectedRetcode" : ExpectedReturnCode, + "FilesCrawled" : RequestList +} + +http_test = HTTPTest ( + pre_hook=pre_test, + test_params=test_options, + post_hook=post_test +) + +http_test.server_setup() +### Get and use dynamic server sockname +srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname () + +MetaXml = MetaXml.replace('{{FILE1_HASH}}', File1_sha256) +MetaXml = MetaXml.replace('{{FILE2_HASH}}', File2_sha256) +MetaXml = MetaXml.replace('{{FILE3_HASH}}', File3_sha256) +MetaXml = MetaXml.replace('{{FILE4_HASH}}', File4_sha256) +MetaXml = MetaXml.replace('{{FILE5_HASH}}', File5_sha256) +MetaXml = MetaXml.replace('{{SRV_HOST}}', srv_host) +MetaXml = MetaXml.replace('{{SRV_PORT}}', str (srv_port)) +MetaHTTP_down.content = MetaXml + +MetaHTTP.content = MetaXml + +MetaHTTPRules["SendHeader"] = { + 'Content-Type': 'application/metalink4+xml', + 'Content-Disposition': 'filename="newname.metalink"' +} + +err = http_test.begin () + +exit (err) diff --git a/testenv/Test-metalink-http-xml-type-trust.py b/testenv/Test-metalink-http-xml-type-trust.py new file mode 100755 index 00000000..0cf20746 --- /dev/null +++ b/testenv/Test-metalink-http-xml-type-trust.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +from sys import exit +from test.http_test import HTTPTest +from misc.wget_file import WgetFile +import hashlib +from base64 import b64encode + +""" + This is to test Metalink/HTTP with Content-Type Metalink/XML. + + With --trust-server-names, trust the metalink:file names. + + Without --trust-server-names, don't trust the metalink:file names: + use the basename of --input-metalink, and add a sequential number + (e.g. .#1, .#2, etc.). + + Strip the directory from unsafe paths. +""" + +############# File Definitions ############################################### +bad = "Ouch!" + +File1 = "Would you like some Tea?" +File1_lowPref = "Do not take this" +File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest () + +File2 = "This is gonna be good" +File2_lowPref = "Not this one too" +File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest () + +File3 = "A little more, please" +File3_lowPref = "That's just too much" +File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest () + +File4 = "Maybe a biscuit?" +File4_lowPref = "No, thanks" +File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest () + +File5 = "More Tea...?" +File5_lowPref = "I have to go..." +File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest () + +MetaXml = \ +""" + + + GNU Wget + + + GNU GPL + http://www.gnu.org/licenses/gpl.html + + Wget Test Files + 1.2.3 + Wget Test Files description + + + + {{FILE1_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File1 + + + + + {{FILE2_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File2_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File2 + + + + + {{FILE3_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File3_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File3 + + + + + {{FILE4_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File4_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File4 + + + + + {{FILE5_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File5_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File5 + + + + +""" + +# This will be filled as soon as we know server hostname and port +MetaHTTPRules = {'SendHeader' : {}} + +MetaHTTP = WgetFile ("main.metalink", rules=MetaHTTPRules) +MetaHTTP_down = WgetFile ("main.metalink", MetaXml) + +wrong_file = WgetFile ("wrong_file", bad) + +File1_orig = WgetFile ("File1", File1) +File1_down = WgetFile ("dir/File1", File1) +File1_nono = WgetFile ("File1_lowPref", File1_lowPref) + +File2_orig = WgetFile ("File2", File2) +File2_down = WgetFile ("dir/File2", File2) +File2_nono = WgetFile ("File2_lowPref", File2_lowPref) + +# rejected by libmetalink +File3_orig = WgetFile ("File3", File3) +File3_nono = WgetFile ("File3_lowPref", File3_lowPref) + +File4_orig = WgetFile ("File4", File4) +File4_down = WgetFile ("dir/File4", File4) +File4_nono = WgetFile ("File4_lowPref", File4_lowPref) + +File5_orig = WgetFile ("File5", File5) +File5_down = WgetFile ("dir/File5", File5) +File5_nono = WgetFile ("File5_lowPref", File5_lowPref) + +WGET_OPTIONS = "--trust-server-names --metalink-over-http --metalink-index=0" +WGET_URLS = [["main.metalink"]] + +RequestList = [[ + "HEAD /main.metalink", + "GET /main.metalink", + "GET /404", + "GET /wrong_file", + "GET /File1", + "GET /File2", + "GET /File4", + "GET /File5" +]] + +Files = [[ + MetaHTTP, + wrong_file, + File1_orig, File1_nono, + File2_orig, File2_nono, + File3_orig, File3_nono, + File4_orig, File4_nono, + File5_orig, File5_nono +]] +Existing_Files = [] + +ExpectedReturnCode = 0 +ExpectedDownloadedFiles = [ + MetaHTTP_down, + File1_down, + File2_down, + File4_down, + File5_down +] + +################ Pre and Post Test Hooks ##################################### +pre_test = { + "ServerFiles" : Files, + "LocalFiles" : Existing_Files +} +test_options = { + "WgetCommands" : WGET_OPTIONS, + "Urls" : WGET_URLS +} +post_test = { + "ExpectedFiles" : ExpectedDownloadedFiles, + "ExpectedRetcode" : ExpectedReturnCode, + "FilesCrawled" : RequestList +} + +http_test = HTTPTest ( + pre_hook=pre_test, + test_params=test_options, + post_hook=post_test +) + +http_test.server_setup() +### Get and use dynamic server sockname +srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname () + +MetaXml = MetaXml.replace('{{FILE1_HASH}}', File1_sha256) +MetaXml = MetaXml.replace('{{FILE2_HASH}}', File2_sha256) +MetaXml = MetaXml.replace('{{FILE3_HASH}}', File3_sha256) +MetaXml = MetaXml.replace('{{FILE4_HASH}}', File4_sha256) +MetaXml = MetaXml.replace('{{FILE5_HASH}}', File5_sha256) +MetaXml = MetaXml.replace('{{SRV_HOST}}', srv_host) +MetaXml = MetaXml.replace('{{SRV_PORT}}', str (srv_port)) +MetaHTTP_down.content = MetaXml + +MetaHTTP.content = MetaXml + +MetaHTTPRules["SendHeader"] = { + 'Content-Type': 'application/metalink4+xml', + 'Content-Disposition': 'filename="newname.metalink"' +} + +err = http_test.begin () + +exit (err) diff --git a/testenv/Test-metalink-http-xml-type.py b/testenv/Test-metalink-http-xml-type.py new file mode 100755 index 00000000..19a65f1e --- /dev/null +++ b/testenv/Test-metalink-http-xml-type.py @@ -0,0 +1,221 @@ +#!/usr/bin/env python3 +from sys import exit +from test.http_test import HTTPTest +from misc.wget_file import WgetFile +import hashlib +from base64 import b64encode + +""" + This is to test Metalink/HTTP with Content-Type Metalink/XML. + + With --trust-server-names, trust the metalink:file names. + + Without --trust-server-names, don't trust the metalink:file names: + use the basename of --input-metalink, and add a sequential number + (e.g. .#1, .#2, etc.). + + Strip the directory from unsafe paths. +""" + +############# File Definitions ############################################### +bad = "Ouch!" + +File1 = "Would you like some Tea?" +File1_lowPref = "Do not take this" +File1_sha256 = hashlib.sha256 (File1.encode ('UTF-8')).hexdigest () + +File2 = "This is gonna be good" +File2_lowPref = "Not this one too" +File2_sha256 = hashlib.sha256 (File2.encode ('UTF-8')).hexdigest () + +File3 = "A little more, please" +File3_lowPref = "That's just too much" +File3_sha256 = hashlib.sha256 (File3.encode ('UTF-8')).hexdigest () + +File4 = "Maybe a biscuit?" +File4_lowPref = "No, thanks" +File4_sha256 = hashlib.sha256 (File4.encode ('UTF-8')).hexdigest () + +File5 = "More Tea...?" +File5_lowPref = "I have to go..." +File5_sha256 = hashlib.sha256 (File5.encode ('UTF-8')).hexdigest () + +MetaXml = \ +""" + + + GNU Wget + + + GNU GPL + http://www.gnu.org/licenses/gpl.html + + Wget Test Files + 1.2.3 + Wget Test Files description + + + + {{FILE1_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File1_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File1 + + + + + {{FILE2_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File2_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File2 + + + + + {{FILE3_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File3_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File3 + + + + + {{FILE4_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File4_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File4 + + + + + {{FILE5_HASH}} + + + http://{{SRV_HOST}}:{{SRV_PORT}}/wrong_file + http://{{SRV_HOST}}:{{SRV_PORT}}/404 + http://{{SRV_HOST}}:{{SRV_PORT}}/File5_lowPref + http://{{SRV_HOST}}:{{SRV_PORT}}/File5 + + + + +""" + +# This will be filled as soon as we know server hostname and port +MetaHTTPRules = {'SendHeader' : {}} + +MetaHTTP = WgetFile ("main.metalink", rules=MetaHTTPRules) +MetaHTTP_down = WgetFile ("main.metalink.meta#1", MetaXml) + +wrong_file = WgetFile ("wrong_file", bad) + +File1_orig = WgetFile ("File1", File1) +File1_down = WgetFile ("main.metalink.meta#1.#1", File1) +File1_nono = WgetFile ("File1_lowPref", File1_lowPref) + +File2_orig = WgetFile ("File2", File2) +File2_down = WgetFile ("main.metalink.meta#1.#2", File2) +File2_nono = WgetFile ("File2_lowPref", File2_lowPref) + +# rejected by libmetalink +File3_orig = WgetFile ("File3", File3) +File3_nono = WgetFile ("File3_lowPref", File3_lowPref) + +File4_orig = WgetFile ("File4", File4) +File4_down = WgetFile ("main.metalink.meta#1.#3", File4) +File4_nono = WgetFile ("File4_lowPref", File4_lowPref) + +File5_orig = WgetFile ("File5", File5) +File5_down = WgetFile ("main.metalink.meta#1.#4", File5) +File5_nono = WgetFile ("File5_lowPref", File5_lowPref) + +WGET_OPTIONS = "--metalink-over-http --metalink-index=0" +WGET_URLS = [["main.metalink"]] + +RequestList = [[ + "HEAD /main.metalink", + "GET /main.metalink", + "GET /404", + "GET /wrong_file", + "GET /File1", + "GET /File2", + "GET /File4", + "GET /File5" +]] + +Files = [[ + MetaHTTP, + wrong_file, + File1_orig, File1_nono, + File2_orig, File2_nono, + File3_orig, File3_nono, + File4_orig, File4_nono, + File5_orig, File5_nono +]] +Existing_Files = [] + +ExpectedReturnCode = 0 +ExpectedDownloadedFiles = [ + MetaHTTP_down, + File1_down, + File2_down, + File4_down, + File5_down +] + +################ Pre and Post Test Hooks ##################################### +pre_test = { + "ServerFiles" : Files, + "LocalFiles" : Existing_Files +} +test_options = { + "WgetCommands" : WGET_OPTIONS, + "Urls" : WGET_URLS +} +post_test = { + "ExpectedFiles" : ExpectedDownloadedFiles, + "ExpectedRetcode" : ExpectedReturnCode, + "FilesCrawled" : RequestList +} + +http_test = HTTPTest ( + pre_hook=pre_test, + test_params=test_options, + post_hook=post_test +) + +http_test.server_setup() +### Get and use dynamic server sockname +srv_host, srv_port = http_test.servers[0].server_inst.socket.getsockname () + +MetaXml = MetaXml.replace('{{FILE1_HASH}}', File1_sha256) +MetaXml = MetaXml.replace('{{FILE2_HASH}}', File2_sha256) +MetaXml = MetaXml.replace('{{FILE3_HASH}}', File3_sha256) +MetaXml = MetaXml.replace('{{FILE4_HASH}}', File4_sha256) +MetaXml = MetaXml.replace('{{FILE5_HASH}}', File5_sha256) +MetaXml = MetaXml.replace('{{SRV_HOST}}', srv_host) +MetaXml = MetaXml.replace('{{SRV_PORT}}', str (srv_port)) +MetaHTTP_down.content = MetaXml + +MetaHTTP.content = MetaXml + +MetaHTTPRules["SendHeader"] = { + 'Content-Type': 'application/metalink4+xml', + 'Content-Disposition': 'filename="newname.metalink"' +} + +err = http_test.begin () + +exit (err)