Parse 'srcset' HTML attr for 'source' HTML tag.

* src/html-url.c (struct known_tag): Use tag_handle_img() for 'source' tag.
* testenv/Test-recursive-include.py: Extend test.
This commit is contained in:
Tim Rühsen 2024-01-27 19:50:13 +01:00
parent bedeb7dc27
commit 4100339a2b
2 changed files with 47 additions and 26 deletions

View File

@ -105,7 +105,7 @@ static struct known_tag {
{ TAG_FORM, "form", tag_handle_form }, { TAG_FORM, "form", tag_handle_form },
{ TAG_FRAME, "frame", tag_find_urls }, { TAG_FRAME, "frame", tag_find_urls },
{ TAG_IFRAME, "iframe", tag_find_urls }, { TAG_IFRAME, "iframe", tag_find_urls },
{ TAG_IMG, "img", tag_handle_img }, { TAG_IMG, "img", tag_handle_img }, // tag_find_urls() plus handling "srcset"
{ TAG_INPUT, "input", tag_find_urls }, { TAG_INPUT, "input", tag_find_urls },
{ TAG_LAYER, "layer", tag_find_urls }, { TAG_LAYER, "layer", tag_find_urls },
{ TAG_LINK, "link", tag_handle_link }, { TAG_LINK, "link", tag_handle_link },
@ -118,7 +118,7 @@ static struct known_tag {
{ TAG_TH, "th", tag_find_urls }, { TAG_TH, "th", tag_find_urls },
{ TAG_VIDEO, "video", tag_find_urls }, { TAG_VIDEO, "video", tag_find_urls },
{ TAG_AUDIO, "audio", tag_find_urls }, { TAG_AUDIO, "audio", tag_find_urls },
{ TAG_SOURCE, "source", tag_find_urls } { TAG_SOURCE, "source", tag_handle_img } // tag_find_urls() plus handling "srcset"
}; };
/* tag_url_attributes documents which attributes of which tags contain /* tag_url_attributes documents which attributes of which tags contain
@ -170,7 +170,7 @@ static struct {
{ TAG_VIDEO, "poster", ATTR_INLINE }, { TAG_VIDEO, "poster", ATTR_INLINE },
{ TAG_AUDIO, "src", ATTR_INLINE }, { TAG_AUDIO, "src", ATTR_INLINE },
{ TAG_AUDIO, "poster", ATTR_INLINE }, { TAG_AUDIO, "poster", ATTR_INLINE },
{ TAG_SOURCE, "src", ATTR_INLINE } { TAG_SOURCE, "src", ATTR_INLINE },
}; };
/* The lists of interesting tags and attributes are built dynamically, /* The lists of interesting tags and attributes are built dynamically,

View File

@ -1,56 +1,77 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from sys import exit from sys import exit
from test.http_test import HTTPTest from test.http_test import HTTPTest
from test.base_test import HTTP, HTTPS from test.base_test import HTTP
from misc.wget_file import WgetFile from misc.wget_file import WgetFile
""" """
Basic test of --recursive. Basic test of --recursive.
""" """
############# File Definitions ############################################### # File Definitions ###############################################
File1 = """<html><body> File1 = """<html><body>
<a href=\"/a/File2.html\">text</a> <a href=\"/a/File2.html\">text</a>
<a href=\"/b/File3.html\">text</a> <a href=\"/b/File3.html\">text</a>
<picture>
Hey, a source <source type=\"image/svg+xml\" lolli=\"xxx\" srcset=\"/a/logo.svg\"/>.
Hey, a srcset <img src=\"/a/picture.png\" srcset=\"/a/picture1.png, /a/picture2.png 150w,/a/picture3.png 100x\"/>.
</picture>
</body></html>""" </body></html>"""
File2 = "With lemon or cream?" File2 = "With lemon or cream?"
File3 = "Surely you're joking Mr. Feynman" File3 = "Surely you're joking Mr. Feynman"
File4 = "logosvg"
File5 = "picturepng"
File6 = "picture1png"
File7 = "picture2png"
File8 = "picture3png"
File1_File = WgetFile ("a/File1.html", File1) File1_File = WgetFile("a/File1.html", File1)
File2_File = WgetFile ("a/File2.html", File2) File2_File = WgetFile("a/File2.html", File2)
File3_File = WgetFile ("b/File3.html", File3) File3_File = WgetFile("b/File3.html", File3)
File4_File = WgetFile("a/logo.svg", File4)
File5_File = WgetFile("a/picture.png", File5)
File6_File = WgetFile("a/picture1.png", File6)
File7_File = WgetFile("a/picture2.png", File7)
File8_File = WgetFile("a/picture3.png", File8)
WGET_OPTIONS = "--recursive --no-host-directories --include-directories=a" WGET_OPTIONS = "--recursive --no-host-directories --include-directories=a"
WGET_URLS = [["a/File1.html"]] WGET_URLS = [["a/File1.html"]]
Servers = [HTTP] Servers = [HTTP]
Files = [[File1_File, File2_File, File3_File]] Files = [[File1_File, File2_File, File3_File, File4_File, File5_File, File6_File, File7_File, File8_File]]
Existing_Files = [] Existing_Files = []
ExpectedReturnCode = 0 ExpectedReturnCode = 0
ExpectedDownloadedFiles = [File1_File, File2_File] ExpectedDownloadedFiles = [File1_File, File2_File, File4_File, File5_File, File6_File, File7_File, File8_File]
Request_List = [["GET /a/File1.html", Request_List = [[
"GET /a/File2.html"]] "GET /a/File1.html",
"GET /a/File2.html",
"GET /a/logo.svg",
"GET /a/picture.png",
"GET /a/picture1.png",
"GET /a/picture2.png",
"GET /a/picture3.png",
]]
################ Pre and Post Test Hooks ##################################### # Pre and Post Test Hooks #####################################
pre_test = { pre_test = {
"ServerFiles" : Files, "ServerFiles": Files,
"LocalFiles" : Existing_Files "LocalFiles": Existing_Files
} }
test_options = { test_options = {
"WgetCommands" : WGET_OPTIONS, "WgetCommands": WGET_OPTIONS,
"Urls" : WGET_URLS "Urls": WGET_URLS
} }
post_test = { post_test = {
"ExpectedFiles" : ExpectedDownloadedFiles, "ExpectedFiles": ExpectedDownloadedFiles,
"ExpectedRetcode" : ExpectedReturnCode "ExpectedRetcode": ExpectedReturnCode
} }
err = HTTPTest ( err = HTTPTest(
pre_hook=pre_test, pre_hook=pre_test,
test_params=test_options, test_params=test_options,
post_hook=post_test, post_hook=post_test,
protocols=Servers protocols=Servers
).begin () ).begin()
exit (err) exit(err)