From e863a6323b88d2cfddffc64aba84e0d9af7ba921 Mon Sep 17 00:00:00 2001
From: abbotti <devnull@localhost>
Date: Thu, 16 May 2002 10:22:24 -0700
Subject: [PATCH] [svn] New function schemes_are_similar_p to test enumerated
 scheme codes for similarity (SCHEME_HTTP and SCHEME_HTTPS are similar).  Use
 it in recur.c (download_child_p).  Fixes a bug that caused -H option to be
 ignored when child scheme different to parent scheme. Published in
 <agn4eu8apduek7magfu9bfe63gto8i7cdh@farscape.privy.mev.co.uk>.

---
 src/ChangeLog | 12 ++++++++++++
 src/recur.c   | 28 +++++++++-------------------
 src/url.c     | 18 ++++++++++++++++++
 src/url.h     |  2 ++
 4 files changed, 41 insertions(+), 19 deletions(-)

diff --git a/src/ChangeLog b/src/ChangeLog
index 95009b85..8d309055 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,15 @@
+2002-05-16  Ian Abbott  <abbotti@mev.co.uk>
+
+	* url.c (schemes_are_similar_p): New function to test enumerated
+	scheme codes for similarity.
+
+	* url.h: Declare it.
+
+	* recur.c (download_child_p): Use it to compare schemes.  This
+	also fixes a bug that allows hosts to be spanned (without the
+	-H option) when the parent scheme is https and the child's is
+	http or vice versa.
+
 2002-05-14  Bill Richardson  <bill@riverstonenet.com>
 
 	* ftp.c (getftp): Don't ftruncate stdout.
diff --git a/src/recur.c b/src/recur.c
index a77ff391..7339c365 100644
--- a/src/recur.c
+++ b/src/recur.c
@@ -415,6 +415,7 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
 {
   struct url *u = upos->url;
   const char *url = u->url;
+  int u_scheme_like_http;
 
   DEBUGP (("Deciding whether to enqueue \"%s\".\n", url));
 
@@ -445,12 +446,11 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
      More time- and memory- consuming tests should be put later on
      the list.  */
 
+  /* Determine whether URL under consideration has a HTTP-like scheme. */
+  u_scheme_like_http = schemes_are_similar_p (u->scheme, SCHEME_HTTP);
+
   /* 1. Schemes other than HTTP are normally not recursed into. */
-  if (u->scheme != SCHEME_HTTP
-#ifdef HAVE_SSL
-      && u->scheme != SCHEME_HTTPS
-#endif
-      && !(u->scheme == SCHEME_FTP && opt.follow_ftp))
+  if (!u_scheme_like_http && !(u->scheme == SCHEME_FTP && opt.follow_ftp))
     {
       DEBUGP (("Not following non-HTTP schemes.\n"));
       goto out;
@@ -458,11 +458,7 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
 
   /* 2. If it is an absolute link and they are not followed, throw it
      out.  */
-  if (u->scheme == SCHEME_HTTP
-#ifdef HAVE_SSL
-      || u->scheme == SCHEME_HTTPS
-#endif
-      )
+  if (schemes_are_similar_p (u->scheme, SCHEME_HTTP))
     if (opt.relative_only && !upos->link_relative_p)
       {
 	DEBUGP (("It doesn't really look like a relative link.\n"));
@@ -483,7 +479,7 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
      opt.no_parent.  Also ignore it for documents needed to display
      the parent page when in -p mode.  */
   if (opt.no_parent
-      && u->scheme == start_url_parsed->scheme
+      && schemes_are_similar_p (u->scheme, start_url_parsed->scheme)
       && 0 == strcasecmp (u->host, start_url_parsed->host)
       && u->port == start_url_parsed->port
       && !(opt.page_requisites && upos->link_inline_p))
@@ -526,7 +522,7 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
     }
 
   /* 7. */
-  if (u->scheme == parent->scheme)
+  if (schemes_are_similar_p (u->scheme, parent->scheme))
     if (!opt.spanhost && 0 != strcasecmp (parent->host, u->host))
       {
 	DEBUGP (("This is not the same hostname as the parent's (%s and %s).\n",
@@ -535,13 +531,7 @@ download_child_p (const struct urlpos *upos, struct url *parent, int depth,
       }
 
   /* 8. */
-  if (opt.use_robots
-      && (u->scheme == SCHEME_HTTP
-#ifdef HAVE_SSL
-	  || u->scheme == SCHEME_HTTPS
-#endif
-	  )
-      )
+  if (opt.use_robots && u_scheme_like_http)
     {
       struct robot_specs *specs = res_get_specs (u->host, u->port);
       if (!specs)
diff --git a/src/url.c b/src/url.c
index 6bcaa39a..f68ee610 100644
--- a/src/url.c
+++ b/src/url.c
@@ -2472,6 +2472,24 @@ downloaded_files_free (void)
       downloaded_files_hash = NULL;
     }
 }
+
+/* Return non-zero if scheme a is similar to scheme b.
+ 
+   Schemes are similar if they are equal.  If SSL is supported, schemes
+   are also similar if one is http (SCHEME_HTTP) and the other is https
+   (SCHEME_HTTPS).  */
+int
+schemes_are_similar_p (enum url_scheme a, enum url_scheme b)
+{
+  if (a == b)
+    return 1;
+#ifdef HAVE_SSL
+  if ((a == SCHEME_HTTP && b == SCHEME_HTTPS)
+      || (a == SCHEME_HTTPS && b == SCHEME_HTTP))
+    return 1;
+#endif
+  return 0;
+}
 
 #if 0
 /* Debugging and testing support for path_simplify. */
diff --git a/src/url.h b/src/url.h
index 79f23814..bd482633 100644
--- a/src/url.h
+++ b/src/url.h
@@ -158,4 +158,6 @@ downloaded_file_t downloaded_file PARAMS ((downloaded_file_t, const char *));
 
 char *rewrite_shorthand_url PARAMS ((const char *));
 
+int schemes_are_similar_p PARAMS ((enum url_scheme a, enum url_scheme b));
+
 #endif /* URL_H */