Add tests for --convert-links option

* testenv/Makefile.am: Add two new tests, Test-k.py and Test-https-k.py
* testenv/Test-k.py: New file. Add a test based on tests/Test-k.px
* testenv/Test-https-k.py: New file. Add a new test to ensure that the
  protocol of the original host URL is retained when creatign absolute
  links.

This test is added as a result of an issue reported on StackExchange:
https://superuser.com/questions/1348940/making-wgets-convert-links-respect-http-vs-https
This commit is contained in:
Darshit Shah 2024-02-19 18:22:57 +05:30
parent dfb9f65fc3
commit f4da5456c2
3 changed files with 186 additions and 1 deletions

View File

@ -44,7 +44,7 @@ DEFAULT_TESTS = \
Test-condget.py \
Test-Content-disposition-2.py \
Test-Content-disposition.py \
test_css_url.py \
test_css_url.py \
Test--convert-links--content-on-error.py \
Test-cookie-401.py \
Test-cookie-domain-mismatch.py \
@ -52,8 +52,10 @@ DEFAULT_TESTS = \
Test-cookie.py \
Test-Head.py \
Test-hsts.py \
Test-https-k.py \
Test--https.py \
Test--https-crl.py \
Test-k.py \
Test-missing-scheme-retval.py \
Test-O.py \
Test-pinnedpubkey-der-https.py \

98
testenv/Test-https-k.py Executable file
View File

@ -0,0 +1,98 @@
#!/usr/bin/env python3
import os
import platform
from sys import exit
from test.base_test import HTTPS, SKIP_TEST
from test.http_test import HTTPTest
from misc.wget_file import WgetFile
"""
Test that Wget handles the --convert-links (-k) option correctly.
Ensure that when downloading, Wget retains the protocol of the host URL
if the link wasn't followed and an absolute link is to be added to the
local file.
"""
if os.getenv('SSL_TESTS') is None:
exit(SKIP_TEST)
############################## File Definitions ##############################
index = """
<html>
<head>
<title>Index</title>
</head>
<body>
<a href="sub.html">Site</a>
<a href="missing.xhtml">Missing</a>
<a href="//localhost:{{port}}/missing2.html">Missing2</a>
</body>
</html>
"""
converted = """
<html>
<head>
<title>Index</title>
</head>
<body>
<a href="sub.html">Site</a>
<a href="https://localhost:{{port}}/missing.xhtml">Missing</a>
<a href="https://localhost:{{port}}/missing2.html">Missing2</a>
</body>
</html>
"""
site = """
<html>
<head>
<title>Site</title>
</head>
<body>
Subsite
</body>
</html>
"""
IndexPage = WgetFile("index.html", index)
SubSite = WgetFile("sub.html", site)
LocalIndexPage = WgetFile("index.html", converted)
print(platform.system())
restrict = "unix" if platform.system() in ["Linux", "Darwin"] else "windows"
WGET_OPTIONS = "-k -r -nH --reject-regex '.*\\.xhtml' --no-check-certificate"
WGET_URLS = [["index.html"]]
Files = [[IndexPage, SubSite]]
Servers = [HTTPS]
ExpectedReturnCode = 8
ExpectedDownloadedFiles = [LocalIndexPage, SubSite]
########################### Pre and Post Test Hooks ##########################
pre_test = {
"ServerFiles": Files,
}
test_options = {
"WgetCommands": WGET_OPTIONS,
"Urls": WGET_URLS
}
post_test = {
"ExpectedFiles": ExpectedDownloadedFiles,
"ExpectedRetcode": ExpectedReturnCode
}
err = HTTPTest(
pre_hook=pre_test,
test_params=test_options,
post_hook=post_test,
protocols=Servers,
).begin()
exit(err)

85
testenv/Test-k.py Executable file
View File

@ -0,0 +1,85 @@
#!/usr/bin/env python3
import platform
from sys import exit
from test.http_test import HTTPTest
from misc.wget_file import WgetFile
"""
Test that Wget handles the --convert-links (-k) option correctly.
Also tests that the --restrict-file-names option works as expected by using a
filename with restricted characters and ensuring that it uses the correct
characterset based on the current OS
"""
############################## File Definitions ##############################
index = """
<html>
<head>
<title>Index</title>
</head>
<body>
<a href="site;sub:.html">Site</a>
</body>
</html>
"""
converted = """
<html>
<head>
<title>Index</title>
</head>
<body>
<a href="./site%3Bsub:.html">Site</a>
</body>
</html>
"""
site = """
<html>
<head>
<title>Site</title>
</head>
<body>
Subsite
</body>
</html>
"""
IndexPage = WgetFile("index.html", index)
SubSite = WgetFile("site;sub:.html", site)
LocalIndexPage = WgetFile("index.html", converted)
print(platform.system())
restrict = "unix" if platform.system() in ["Linux", "Darwin"] else "windows"
WGET_OPTIONS = f"-k -r -nH --restrict-file-names={restrict}"
WGET_URLS = [["index.html"]]
Files = [[IndexPage, SubSite]]
ExpectedReturnCode = 0
ExpectedDownloadedFiles = [LocalIndexPage, SubSite]
########################### Pre and Post Test Hooks ##########################
pre_test = {
"ServerFiles": Files,
}
test_options = {
"WgetCommands": WGET_OPTIONS,
"Urls": WGET_URLS
}
post_test = {
"ExpectedFiles": ExpectedDownloadedFiles,
"ExpectedRetcode": ExpectedReturnCode
}
err = HTTPTest(
pre_hook=pre_test,
test_params=test_options,
post_hook=post_test,
).begin()
exit(err)