Parse 'srcset' HTML attr for 'source' HTML tag.

* src/html-url.c (struct known_tag): Use tag_handle_img() for 'source' tag.
* testenv/Test-recursive-include.py: Extend test.
This commit is contained in:
Tim Rühsen 2024-01-27 19:50:13 +01:00
parent bedeb7dc27
commit 4100339a2b
2 changed files with 47 additions and 26 deletions

View File

@ -105,7 +105,7 @@ static struct known_tag {
{ TAG_FORM, "form", tag_handle_form },
{ TAG_FRAME, "frame", tag_find_urls },
{ TAG_IFRAME, "iframe", tag_find_urls },
{ TAG_IMG, "img", tag_handle_img },
{ TAG_IMG, "img", tag_handle_img }, // tag_find_urls() plus handling "srcset"
{ TAG_INPUT, "input", tag_find_urls },
{ TAG_LAYER, "layer", tag_find_urls },
{ TAG_LINK, "link", tag_handle_link },
@ -118,7 +118,7 @@ static struct known_tag {
{ TAG_TH, "th", tag_find_urls },
{ TAG_VIDEO, "video", tag_find_urls },
{ TAG_AUDIO, "audio", tag_find_urls },
{ TAG_SOURCE, "source", tag_find_urls }
{ TAG_SOURCE, "source", tag_handle_img } // tag_find_urls() plus handling "srcset"
};
/* tag_url_attributes documents which attributes of which tags contain
@ -170,7 +170,7 @@ static struct {
{ TAG_VIDEO, "poster", ATTR_INLINE },
{ TAG_AUDIO, "src", ATTR_INLINE },
{ TAG_AUDIO, "poster", ATTR_INLINE },
{ TAG_SOURCE, "src", ATTR_INLINE }
{ TAG_SOURCE, "src", ATTR_INLINE },
};
/* The lists of interesting tags and attributes are built dynamically,

View File

@ -1,56 +1,77 @@
#!/usr/bin/env python3
from sys import exit
from test.http_test import HTTPTest
from test.base_test import HTTP, HTTPS
from test.base_test import HTTP
from misc.wget_file import WgetFile
"""
Basic test of --recursive.
"""
############# File Definitions ###############################################
# File Definitions ###############################################
File1 = """<html><body>
<a href=\"/a/File2.html\">text</a>
<a href=\"/b/File3.html\">text</a>
<picture>
Hey, a source <source type=\"image/svg+xml\" lolli=\"xxx\" srcset=\"/a/logo.svg\"/>.
Hey, a srcset <img src=\"/a/picture.png\" srcset=\"/a/picture1.png, /a/picture2.png 150w,/a/picture3.png 100x\"/>.
</picture>
</body></html>"""
File2 = "With lemon or cream?"
File3 = "Surely you're joking Mr. Feynman"
File4 = "logosvg"
File5 = "picturepng"
File6 = "picture1png"
File7 = "picture2png"
File8 = "picture3png"
File1_File = WgetFile ("a/File1.html", File1)
File2_File = WgetFile ("a/File2.html", File2)
File3_File = WgetFile ("b/File3.html", File3)
File1_File = WgetFile("a/File1.html", File1)
File2_File = WgetFile("a/File2.html", File2)
File3_File = WgetFile("b/File3.html", File3)
File4_File = WgetFile("a/logo.svg", File4)
File5_File = WgetFile("a/picture.png", File5)
File6_File = WgetFile("a/picture1.png", File6)
File7_File = WgetFile("a/picture2.png", File7)
File8_File = WgetFile("a/picture3.png", File8)
WGET_OPTIONS = "--recursive --no-host-directories --include-directories=a"
WGET_URLS = [["a/File1.html"]]
Servers = [HTTP]
Files = [[File1_File, File2_File, File3_File]]
Files = [[File1_File, File2_File, File3_File, File4_File, File5_File, File6_File, File7_File, File8_File]]
Existing_Files = []
ExpectedReturnCode = 0
ExpectedDownloadedFiles = [File1_File, File2_File]
Request_List = [["GET /a/File1.html",
"GET /a/File2.html"]]
ExpectedDownloadedFiles = [File1_File, File2_File, File4_File, File5_File, File6_File, File7_File, File8_File]
Request_List = [[
"GET /a/File1.html",
"GET /a/File2.html",
"GET /a/logo.svg",
"GET /a/picture.png",
"GET /a/picture1.png",
"GET /a/picture2.png",
"GET /a/picture3.png",
]]
################ Pre and Post Test Hooks #####################################
# Pre and Post Test Hooks #####################################
pre_test = {
"ServerFiles" : Files,
"LocalFiles" : Existing_Files
"ServerFiles": Files,
"LocalFiles": Existing_Files
}
test_options = {
"WgetCommands" : WGET_OPTIONS,
"Urls" : WGET_URLS
"WgetCommands": WGET_OPTIONS,
"Urls": WGET_URLS
}
post_test = {
"ExpectedFiles" : ExpectedDownloadedFiles,
"ExpectedRetcode" : ExpectedReturnCode
"ExpectedFiles": ExpectedDownloadedFiles,
"ExpectedRetcode": ExpectedReturnCode
}
err = HTTPTest (
pre_hook=pre_test,
test_params=test_options,
post_hook=post_test,
protocols=Servers
).begin ()
err = HTTPTest(
pre_hook=pre_test,
test_params=test_options,
post_hook=post_test,
protocols=Servers
).begin()
exit (err)
exit(err)