From 4100339a2ba8b0c0fdf36558222f05c27aa7808a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20R=C3=BChsen?= <tim.ruehsen@gmx.de>
Date: Sat, 27 Jan 2024 19:50:13 +0100
Subject: [PATCH] Parse 'srcset' HTML attr for 'source' HTML tag.

* src/html-url.c (struct known_tag): Use tag_handle_img() for 'source' tag.
* testenv/Test-recursive-include.py: Extend test.
---
 src/html-url.c                    |  6 +--
 testenv/Test-recursive-include.py | 67 ++++++++++++++++++++-----------
 2 files changed, 47 insertions(+), 26 deletions(-)

diff --git a/src/html-url.c b/src/html-url.c
index 896d6fc4..d94899fb 100644
--- a/src/html-url.c
+++ b/src/html-url.c
@@ -105,7 +105,7 @@ static struct known_tag {
   { TAG_FORM,    "form",        tag_handle_form },
   { TAG_FRAME,   "frame",       tag_find_urls },
   { TAG_IFRAME,  "iframe",      tag_find_urls },
-  { TAG_IMG,     "img",         tag_handle_img },
+  { TAG_IMG,     "img",         tag_handle_img }, // tag_find_urls() plus handling "srcset"
   { TAG_INPUT,   "input",       tag_find_urls },
   { TAG_LAYER,   "layer",       tag_find_urls },
   { TAG_LINK,    "link",        tag_handle_link },
@@ -118,7 +118,7 @@ static struct known_tag {
   { TAG_TH,      "th",          tag_find_urls },
   { TAG_VIDEO,   "video",       tag_find_urls },
   { TAG_AUDIO,   "audio",       tag_find_urls },
-  { TAG_SOURCE,  "source",      tag_find_urls }
+  { TAG_SOURCE,  "source",      tag_handle_img } // tag_find_urls() plus handling "srcset"
 };
 
 /* tag_url_attributes documents which attributes of which tags contain
@@ -170,7 +170,7 @@ static struct {
   { TAG_VIDEO,          "poster",       ATTR_INLINE },
   { TAG_AUDIO,          "src",          ATTR_INLINE },
   { TAG_AUDIO,          "poster",       ATTR_INLINE },
-  { TAG_SOURCE,         "src",          ATTR_INLINE }
+  { TAG_SOURCE,         "src",          ATTR_INLINE },
 };
 
 /* The lists of interesting tags and attributes are built dynamically,
diff --git a/testenv/Test-recursive-include.py b/testenv/Test-recursive-include.py
index 1fe33cd6..d21b305c 100755
--- a/testenv/Test-recursive-include.py
+++ b/testenv/Test-recursive-include.py
@@ -1,56 +1,77 @@
 #!/usr/bin/env python3
 from sys import exit
 from test.http_test import HTTPTest
-from test.base_test import HTTP, HTTPS
+from test.base_test import HTTP
 from misc.wget_file import WgetFile
 
 """
     Basic test of --recursive.
 """
-############# File Definitions ###############################################
+# File Definitions ###############################################
 File1 = """<html><body>
 <a href=\"/a/File2.html\">text</a>
 <a href=\"/b/File3.html\">text</a>
+<picture>
+  Hey, a source <source type=\"image/svg+xml\" lolli=\"xxx\" srcset=\"/a/logo.svg\"/>.
+  Hey, a srcset <img src=\"/a/picture.png\" srcset=\"/a/picture1.png, /a/picture2.png 150w,/a/picture3.png 100x\"/>.
+</picture>
 </body></html>"""
 File2 = "With lemon or cream?"
 File3 = "Surely you're joking Mr. Feynman"
+File4 = "logosvg"
+File5 = "picturepng"
+File6 = "picture1png"
+File7 = "picture2png"
+File8 = "picture3png"
 
-File1_File = WgetFile ("a/File1.html", File1)
-File2_File = WgetFile ("a/File2.html", File2)
-File3_File = WgetFile ("b/File3.html", File3)
+File1_File = WgetFile("a/File1.html", File1)
+File2_File = WgetFile("a/File2.html", File2)
+File3_File = WgetFile("b/File3.html", File3)
+File4_File = WgetFile("a/logo.svg", File4)
+File5_File = WgetFile("a/picture.png", File5)
+File6_File = WgetFile("a/picture1.png", File6)
+File7_File = WgetFile("a/picture2.png", File7)
+File8_File = WgetFile("a/picture3.png", File8)
 
 WGET_OPTIONS = "--recursive --no-host-directories --include-directories=a"
 WGET_URLS = [["a/File1.html"]]
 
 Servers = [HTTP]
 
-Files = [[File1_File, File2_File, File3_File]]
+Files = [[File1_File, File2_File, File3_File, File4_File, File5_File, File6_File, File7_File, File8_File]]
 Existing_Files = []
 
 ExpectedReturnCode = 0
-ExpectedDownloadedFiles = [File1_File, File2_File]
-Request_List = [["GET /a/File1.html",
-                 "GET /a/File2.html"]]
+ExpectedDownloadedFiles = [File1_File, File2_File, File4_File, File5_File, File6_File, File7_File, File8_File]
+Request_List = [[
+    "GET /a/File1.html",
+    "GET /a/File2.html",
+    "GET /a/logo.svg",
+    "GET /a/picture.png",
+    "GET /a/picture1.png",
+    "GET /a/picture2.png",
+    "GET /a/picture3.png",
+]]
 
-################ Pre and Post Test Hooks #####################################
+# Pre and Post Test Hooks #####################################
 pre_test = {
-    "ServerFiles"       : Files,
-    "LocalFiles"        : Existing_Files
+    "ServerFiles": Files,
+    "LocalFiles": Existing_Files
 }
 test_options = {
-    "WgetCommands"      : WGET_OPTIONS,
-    "Urls"              : WGET_URLS
+    "WgetCommands": WGET_OPTIONS,
+    "Urls": WGET_URLS
 }
 post_test = {
-    "ExpectedFiles"     : ExpectedDownloadedFiles,
-    "ExpectedRetcode"   : ExpectedReturnCode
+    "ExpectedFiles": ExpectedDownloadedFiles,
+    "ExpectedRetcode": ExpectedReturnCode
 }
 
-err = HTTPTest (
-                pre_hook=pre_test,
-                test_params=test_options,
-                post_hook=post_test,
-                protocols=Servers
-).begin ()
+err = HTTPTest(
+    pre_hook=pre_test,
+    test_params=test_options,
+    post_hook=post_test,
+    protocols=Servers
+).begin()
 
-exit (err)
+exit(err)