From 4100339a2ba8b0c0fdf36558222f05c27aa7808a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= <tim.ruehsen@gmx.de> Date: Sat, 27 Jan 2024 19:50:13 +0100 Subject: [PATCH] Parse 'srcset' HTML attr for 'source' HTML tag. * src/html-url.c (struct known_tag): Use tag_handle_img() for 'source' tag. * testenv/Test-recursive-include.py: Extend test. --- src/html-url.c | 6 +-- testenv/Test-recursive-include.py | 67 ++++++++++++++++++++----------- 2 files changed, 47 insertions(+), 26 deletions(-) diff --git a/src/html-url.c b/src/html-url.c index 896d6fc4..d94899fb 100644 --- a/src/html-url.c +++ b/src/html-url.c @@ -105,7 +105,7 @@ static struct known_tag { { TAG_FORM, "form", tag_handle_form }, { TAG_FRAME, "frame", tag_find_urls }, { TAG_IFRAME, "iframe", tag_find_urls }, - { TAG_IMG, "img", tag_handle_img }, + { TAG_IMG, "img", tag_handle_img }, // tag_find_urls() plus handling "srcset" { TAG_INPUT, "input", tag_find_urls }, { TAG_LAYER, "layer", tag_find_urls }, { TAG_LINK, "link", tag_handle_link }, @@ -118,7 +118,7 @@ static struct known_tag { { TAG_TH, "th", tag_find_urls }, { TAG_VIDEO, "video", tag_find_urls }, { TAG_AUDIO, "audio", tag_find_urls }, - { TAG_SOURCE, "source", tag_find_urls } + { TAG_SOURCE, "source", tag_handle_img } // tag_find_urls() plus handling "srcset" }; /* tag_url_attributes documents which attributes of which tags contain @@ -170,7 +170,7 @@ static struct { { TAG_VIDEO, "poster", ATTR_INLINE }, { TAG_AUDIO, "src", ATTR_INLINE }, { TAG_AUDIO, "poster", ATTR_INLINE }, - { TAG_SOURCE, "src", ATTR_INLINE } + { TAG_SOURCE, "src", ATTR_INLINE }, }; /* The lists of interesting tags and attributes are built dynamically, diff --git a/testenv/Test-recursive-include.py b/testenv/Test-recursive-include.py index 1fe33cd6..d21b305c 100755 --- a/testenv/Test-recursive-include.py +++ b/testenv/Test-recursive-include.py @@ -1,56 +1,77 @@ #!/usr/bin/env python3 from sys import exit from test.http_test import HTTPTest -from test.base_test import HTTP, HTTPS +from test.base_test import HTTP from misc.wget_file import WgetFile """ Basic test of --recursive. """ -############# File Definitions ############################################### +# File Definitions ############################################### File1 = """<html><body> <a href=\"/a/File2.html\">text</a> <a href=\"/b/File3.html\">text</a> +<picture> + Hey, a source <source type=\"image/svg+xml\" lolli=\"xxx\" srcset=\"/a/logo.svg\"/>. + Hey, a srcset <img src=\"/a/picture.png\" srcset=\"/a/picture1.png, /a/picture2.png 150w,/a/picture3.png 100x\"/>. +</picture> </body></html>""" File2 = "With lemon or cream?" File3 = "Surely you're joking Mr. Feynman" +File4 = "logosvg" +File5 = "picturepng" +File6 = "picture1png" +File7 = "picture2png" +File8 = "picture3png" -File1_File = WgetFile ("a/File1.html", File1) -File2_File = WgetFile ("a/File2.html", File2) -File3_File = WgetFile ("b/File3.html", File3) +File1_File = WgetFile("a/File1.html", File1) +File2_File = WgetFile("a/File2.html", File2) +File3_File = WgetFile("b/File3.html", File3) +File4_File = WgetFile("a/logo.svg", File4) +File5_File = WgetFile("a/picture.png", File5) +File6_File = WgetFile("a/picture1.png", File6) +File7_File = WgetFile("a/picture2.png", File7) +File8_File = WgetFile("a/picture3.png", File8) WGET_OPTIONS = "--recursive --no-host-directories --include-directories=a" WGET_URLS = [["a/File1.html"]] Servers = [HTTP] -Files = [[File1_File, File2_File, File3_File]] +Files = [[File1_File, File2_File, File3_File, File4_File, File5_File, File6_File, File7_File, File8_File]] Existing_Files = [] ExpectedReturnCode = 0 -ExpectedDownloadedFiles = [File1_File, File2_File] -Request_List = [["GET /a/File1.html", - "GET /a/File2.html"]] +ExpectedDownloadedFiles = [File1_File, File2_File, File4_File, File5_File, File6_File, File7_File, File8_File] +Request_List = [[ + "GET /a/File1.html", + "GET /a/File2.html", + "GET /a/logo.svg", + "GET /a/picture.png", + "GET /a/picture1.png", + "GET /a/picture2.png", + "GET /a/picture3.png", +]] -################ Pre and Post Test Hooks ##################################### +# Pre and Post Test Hooks ##################################### pre_test = { - "ServerFiles" : Files, - "LocalFiles" : Existing_Files + "ServerFiles": Files, + "LocalFiles": Existing_Files } test_options = { - "WgetCommands" : WGET_OPTIONS, - "Urls" : WGET_URLS + "WgetCommands": WGET_OPTIONS, + "Urls": WGET_URLS } post_test = { - "ExpectedFiles" : ExpectedDownloadedFiles, - "ExpectedRetcode" : ExpectedReturnCode + "ExpectedFiles": ExpectedDownloadedFiles, + "ExpectedRetcode": ExpectedReturnCode } -err = HTTPTest ( - pre_hook=pre_test, - test_params=test_options, - post_hook=post_test, - protocols=Servers -).begin () +err = HTTPTest( + pre_hook=pre_test, + test_params=test_options, + post_hook=post_test, + protocols=Servers +).begin() -exit (err) +exit(err)