#!/usr/bin/env python3 from sys import exit from test.http_test import HTTPTest from misc.wget_file import WgetFile """ This test executed Wget in recursive mode with a rejected log outputted. """ ############# File Definitions ############################################### mainpage = """ Main Page

Recurse to a second page.

""" secondpage = """ Second Page

Recurse to a third page. Try the blacklisted main page.

""" thirdpage = """ Third Page

Try a hidden dummy file. Try to leave to another domain.

""" robots = """ User-agent: * Disallow: /dummy.txt """ log = """\ REASON\tU_URL\tU_SCHEME\tU_HOST\tU_PORT\tU_PATH\tU_PARAMS\tU_QUERY\tU_FRAGMENT\tP_URL\tP_SCHEME\tP_HOST\tP_PORT\tP_PATH\tP_PARAMS\tP_QUERY\tP_FRAGMENT BLACKLIST\thttp%3A//127.0.0.1%3A{{port}}/index.html\tSCHEME_HTTP\t127.0.0.1\t{{port}}\tindex.html\t\t\t\thttp%3A//127.0.0.1%3A{{port}}/secondpage.html\tSCHEME_HTTP\t127.0.0.1\t{{port}}\tsecondpage.html\t\t\t ROBOTS\thttp%3A//127.0.0.1%3A{{port}}/dummy.txt\tSCHEME_HTTP\t127.0.0.1\t{{port}}\tdummy.txt\t\t\t\thttp%3A//127.0.0.1%3A{{port}}/thirdpage.html\tSCHEME_HTTP\t127.0.0.1\t{{port}}\tthirdpage.html\t\t\t SPANNEDHOST\thttp%3A//no.such.domain/\tSCHEME_HTTP\tno.such.domain\t80\t\t\t\t\thttp%3A//127.0.0.1%3A{{port}}/thirdpage.html\tSCHEME_HTTP\t127.0.0.1\t{{port}}\tthirdpage.html\t\t\t """ dummyfile = "Don't care." index_html = WgetFile ("index.html", mainpage) secondpage_html = WgetFile ("secondpage.html", secondpage) thirdpage_html = WgetFile ("thirdpage.html", thirdpage) robots_txt = WgetFile ("robots.txt", robots) dummy_txt = WgetFile ("dummy.txt", dummyfile) log_csv = WgetFile ("log.csv", log) WGET_OPTIONS = "-nd -r --rejected-log log.csv" WGET_URLS = [["index.html"]] Files = [[index_html, secondpage_html, thirdpage_html, robots_txt, dummy_txt]] ExpectedReturnCode = 0 ExpectedDownloadedFiles = [index_html, secondpage_html, thirdpage_html, robots_txt, log_csv] ################ Pre and Post Test Hooks ##################################### pre_test = { "ServerFiles" : Files } test_options = { "WgetCommands" : WGET_OPTIONS, "Urls" : WGET_URLS } post_test = { "ExpectedFiles" : ExpectedDownloadedFiles, "ExpectedRetcode" : ExpectedReturnCode } err = HTTPTest ( pre_hook=pre_test, test_params=test_options, post_hook=post_test ).begin () exit (err)