From cf994330a37148db5b627b4cca74f098d5887cdd Mon Sep 17 00:00:00 2001
From: hniksic <devnull@localhost>
Date: Sun, 24 Apr 2005 13:00:19 -0700
Subject: [PATCH] [svn] Added reordering of addresses to try IPv4 first and the
 associated --prefer-family switch.

---
 doc/ChangeLog |  4 +++
 doc/wget.texi | 33 ++++++++++++++++++++++++
 src/ChangeLog | 15 +++++++++++
 src/host.c    | 70 ++++++++++++++++++++++++++++++++++++++++-----------
 src/init.c    | 28 +++++++++++++++++++++
 src/main.c    |  4 +++
 src/options.h |  6 +++++
 src/utils.c   | 46 +++++++++++++++++++++++++++++++++
 src/utils.h   |  3 +++
 9 files changed, 194 insertions(+), 15 deletions(-)

diff --git a/doc/ChangeLog b/doc/ChangeLog
index 33954e1c..28cc49e8 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,7 @@
+2005-04-24  Hrvoje Niksic  <hniksic@xemacs.org>
+
+	* wget.texi (Download Options): Document --prefer-family.
+
 2005-04-24  Hrvoje Niksic  <hniksic@xemacs.org>
 
 	* wget.texi (Download Options): Don't claim that -6 accepts mapped
diff --git a/doc/wget.texi b/doc/wget.texi
index e2fb09f8..bb59b4d6 100644
--- a/doc/wget.texi
+++ b/doc/wget.texi
@@ -900,6 +900,27 @@ or to deal with broken network configuration.  Only one of
 @samp{--inet6-only} and @samp{--inet4-only} may be specified in the
 same command.  Neither option is available in Wget compiled without
 IPv6 support.
+
+@item --prefer-family=IPv4/IPv6/none
+When given a choice of several addresses, connect to the addresses
+with specified address family first.  IPv4 addresses are preferred by
+default.
+
+This avoids spurious errors and correct attempts when accessing hosts
+that resolve to both IPv6 and IPv4 addresses from IPv4 networks.  For
+example, @samp{www.kame.net} resolves to
+@samp{2001:200:0:8002:203:47ff:fea5:3085} and to
+@samp{203.178.141.194}.  When the preferred family is @code{IPv4}, the
+IPv4 address is used first; when the preferred family is @code{IPv6},
+the IPv6 address is used first; if the specified value is @code{none},
+the address order returned by DNS is used without change.
+
+Unlike @samp{-4} and @samp{-6}, this option doesn't forbid access to
+any address family, it only changes the @emph{order} in which the
+addresses are accessed.  Also note that the reordering performed by
+this option is @dfn{stable}---it doesn't affect order of addresses of
+the same family.  That is, the relative order of all IPv4 addresses
+and of all IPv6 addresses remains intact in all cases.
 @end table
 
 @node Directory Options
@@ -2536,6 +2557,12 @@ the request body.  The same as @samp{--post-data}.
 Use POST as the method for all HTTP requests and send the contents of
 @var{file} in the request body.  The same as @samp{--post-file}.
 
+@item prefer_family = IPv4/IPv6/none
+When given a choice of several addresses, connect to the addresses
+with specified address family first.  IPv4 addresses are preferred by
+default.  The same as @samp{--prefer-family}, which see for a detailed
+discussion of why this is useful.
+
 @item progress = @var{string}
 Set the type of the progress indicator.  Legal types are ``dot'' and
 ``bar''.
@@ -3548,6 +3575,12 @@ Mauro Tortonesi,
 Dave Turner,
 Gisle Vanem,
 Russell Vincent,
+@iftex
+@v{Z}eljko Vrba,
+@end iftex
+@ifnottex
+Zeljko Vrba,
+@end ifnottex
 Charles G Waldman,
 Douglas E. Wegscheid,
 YAMAZAKI Makoto,
diff --git a/src/ChangeLog b/src/ChangeLog
index dbc1b27c..80df1b49 100644
--- a/src/ChangeLog
+++ b/src/ChangeLog
@@ -1,3 +1,18 @@
+2005-04-24  Hrvoje Niksic  <hniksic@xemacs.org>
+
+	* host.c (cmp_prefer_ipv4): New function.
+	(cmp_prefer_ipv6): New function.
+	(lookup_host): Use the appropriate comparator according to
+	opt.prefer_family.
+
+	* init.c: New option prefer_family.
+
+	* host.c (is_valid_ipv6_address): Spell NS_* constants in lower
+	case to avoid clash with system headers.
+	(lookup_host): Reorder the addresses so that IPv4 ones come first.
+
+	* utils.c (stable_sort): New function.
+
 2005-04-24  Hrvoje Niksic  <hniksic@xemacs.org>
 
 	* connect.c (retryable_socket_connect_error): Return 0 for
diff --git a/src/host.c b/src/host.c
index 585b93c7..5082e179 100644
--- a/src/host.c
+++ b/src/host.c
@@ -239,6 +239,32 @@ address_list_from_addrinfo (const struct addrinfo *ai)
   return al;
 }
 
+#define IS_IPV4(addr) (((const ip_address *) addr)->type == IPV4_ADDRESS)
+
+/* Compare two IP addresses by type, giving preference to the IPv4
+   address (sorting it first).  In other words, return -1 if ADDR1 is
+   IPv4 and ADDR2 is IPv6, +1 if ADDR1 is IPv6 and ADDR2 is IPv4, and
+   0 otherwise.
+
+   This is intended to be used as the comparator arg to a qsort-like
+   sorting function, which is why it accepts generic pointers.  */
+
+static int
+cmp_prefer_ipv4 (const void *addr1, const void *addr2)
+{
+  return !IS_IPV4 (addr1) - !IS_IPV4 (addr2);
+}
+
+#define IS_IPV6(addr) (((const ip_address *) addr)->type == IPV6_ADDRESS)
+
+/* Like the above, but give preference to the IPv6 address.  */
+
+static int
+cmp_prefer_ipv6 (const void *addr1, const void *addr2)
+{
+  return !IS_IPV6 (addr1) - !IS_IPV6 (addr2);
+}
+
 #else  /* not ENABLE_IPV6 */
 
 /* Create an address_list from a NULL-terminated vector of IPv4
@@ -473,10 +499,11 @@ is_valid_ipv4_address (const char *str, const char *end)
 int
 is_valid_ipv6_address (const char *str, const char *end)
 {
+  /* Use lower-case for these to avoid clash with system headers.  */
   enum {
-    NS_INADDRSZ  = 4,
-    NS_IN6ADDRSZ = 16,
-    NS_INT16SZ   = 2
+    ns_inaddrsz  = 4,
+    ns_in6addrsz = 16,
+    ns_int16sz   = 2
   };
 
   const char *curtok;
@@ -531,19 +558,19 @@ is_valid_ipv6_address (const char *str, const char *end)
 	    }
 	  else if (str == end)
 	    return 0;
-	  if (tp > NS_IN6ADDRSZ - NS_INT16SZ)
+	  if (tp > ns_in6addrsz - ns_int16sz)
 	    return 0;
-	  tp += NS_INT16SZ;
+	  tp += ns_int16sz;
 	  saw_xdigit = 0;
 	  val = 0;
 	  continue;
 	}
 
       /* if ch is a dot ... */
-      if (ch == '.' && (tp <= NS_IN6ADDRSZ - NS_INADDRSZ)
+      if (ch == '.' && (tp <= ns_in6addrsz - ns_inaddrsz)
 	  && is_valid_ipv4_address (curtok, end) == 1)
 	{
-	  tp += NS_INADDRSZ;
+	  tp += ns_inaddrsz;
 	  saw_xdigit = 0;
 	  break;
 	}
@@ -553,19 +580,19 @@ is_valid_ipv6_address (const char *str, const char *end)
 
   if (saw_xdigit == 1)
     {
-      if (tp > NS_IN6ADDRSZ - NS_INT16SZ) 
+      if (tp > ns_in6addrsz - ns_int16sz) 
 	return 0;
-      tp += NS_INT16SZ;
+      tp += ns_int16sz;
     }
 
   if (colonp != NULL)
     {
-      if (tp == NS_IN6ADDRSZ) 
+      if (tp == ns_in6addrsz) 
 	return 0;
-      tp = NS_IN6ADDRSZ;
+      tp = ns_in6addrsz;
     }
 
-  if (tp != NS_IN6ADDRSZ)
+  if (tp != ns_in6addrsz)
     return 0;
 
   return 1;
@@ -640,15 +667,20 @@ cache_remove (const char *host)
     }
 }
 
-/* Look up HOST in DNS and return a list of IP addresses.  The
-   addresses in the list are in the same order in which
-   gethostbyname/getaddrinfo returned them.
+/* Look up HOST in DNS and return a list of IP addresses.
 
    This function caches its result so that, if the same host is passed
    the second time, the addresses are returned without DNS lookup.
    (Use LH_REFRESH to force lookup, or set opt.dns_cache to 0 to
    globally disable caching.)
 
+   The order of the returned addresses is affected by the setting of
+   opt.prefer_family: if it is set to prefer_ipv4, IPv4 addresses are
+   placed at the beginning; if it is prefer_ipv6, IPv6 ones are placed
+   at the beginning; otherwise, the order is left intact.  The
+   relative order of addresses with the same family is left
+   undisturbed in either case.
+
    FLAGS can be a combination of:
      LH_SILENT  - don't print the "resolving ... done" messages.
      LH_BIND    - resolve addresses for use with bind, which under
@@ -778,6 +810,14 @@ lookup_host (const char *host, int flags)
 		   _("failed: No IPv4/IPv6 addresses for host.\n"));
 	return NULL;
       }
+
+    /* Reorder addresses so that IPv4 ones (or IPv6 ones, as per
+       --prefer-family) come first.  Sorting is stable so the order of
+       the addresses with the same family is undisturbed.  */
+    if (al->count > 1 && opt.prefer_family != prefer_none)
+      stable_sort (al->addresses, al->count, sizeof (ip_address),
+		   opt.prefer_family == prefer_ipv4
+		   ? cmp_prefer_ipv4 : cmp_prefer_ipv6);
   }
 #else  /* not ENABLE_IPV6 */
   {
diff --git a/src/init.c b/src/init.c
index 3fb9aa23..8ab1bec3 100644
--- a/src/init.c
+++ b/src/init.c
@@ -88,6 +88,7 @@ CMD_DECLARE (cmd_spec_dirstruct);
 CMD_DECLARE (cmd_spec_header);
 CMD_DECLARE (cmd_spec_htmlify);
 CMD_DECLARE (cmd_spec_mirror);
+CMD_DECLARE (cmd_spec_prefer_family);
 CMD_DECLARE (cmd_spec_progress);
 CMD_DECLARE (cmd_spec_recursive);
 CMD_DECLARE (cmd_spec_restrict_file_names);
@@ -177,6 +178,7 @@ static struct {
   { "passiveftp",	&opt.ftp_pasv,		cmd_lockable_boolean },
   { "postdata",		&opt.post_data,		cmd_string },
   { "postfile",		&opt.post_file_name,	cmd_file },
+  { "preferfamily",	NULL,			cmd_spec_prefer_family },
   { "preservepermissions", &opt.preserve_perm,	cmd_boolean },
   { "progress",		&opt.progress_type,	cmd_spec_progress },
   { "protocoldirectories", &opt.protocol_directories, cmd_boolean },
@@ -1072,6 +1074,32 @@ cmd_spec_mirror (const char *com, const char *val, void *closure)
   return 1;
 }
 
+/* Validate --prefer-family and set the choice.  Allowed values are
+   "IPv4", "IPv6", and "none".  */
+
+static int
+cmd_spec_prefer_family (const char *com, const char *val, void *closure)
+{
+  if (0 == strcasecmp (val, "ipv4"))
+    {
+      opt.prefer_family = prefer_ipv4;
+      return 1;
+    }
+  else if (0 == strcasecmp (val, "ipv6"))
+    {
+      opt.prefer_family = prefer_ipv6;
+      return 1;
+    }
+  else if (0 == strcasecmp (val, "none"))
+    {
+      opt.prefer_family = prefer_none;
+      return 1;
+    }
+  fprintf (stderr, _("%s: %s: Invalid preferred family `%s'.\n"),
+	   exec_name, com, val);
+  return 0;
+}
+
 /* Set progress.type to VAL, but verify that it's a valid progress
    implementation before that.  */
 
diff --git a/src/main.c b/src/main.c
index 718abc9e..4d2163ab 100644
--- a/src/main.c
+++ b/src/main.c
@@ -214,6 +214,7 @@ struct cmdline_option option_data[] =
     { "passive-ftp", 0, OPT_BOOLEAN, "passiveftp", -1 },
     { "post-data", 0, OPT_VALUE, "postdata", -1 },
     { "post-file", 0, OPT_VALUE, "postfile", -1 },
+    { "prefer-family", 0, OPT_VALUE, "preferfamily", -1 },
     { "preserve-permissions", 0, OPT_BOOLEAN, "preservepermissions", -1 },
     { "progress", 0, OPT_VALUE, "progress", -1 },
     { "protocol-directories", 0, OPT_BOOLEAN, "protocoldirectories", -1 },
@@ -465,6 +466,9 @@ Download:\n"),
   -4,  --inet4-only              connect only to IPv4 addresses.\n"),
     N_("\
   -6,  --inet6-only              connect only to IPv6 addresses.\n"),
+    N_("\
+       --prefer-family=FAMILY    connect first to addresses of specified family,\n\
+                                 one of IPv6, IPv4, or none.\n"),
 #endif
     "\n",
 
diff --git a/src/options.h b/src/options.h
index 90f9fc3a..d4140a0c 100644
--- a/src/options.h
+++ b/src/options.h
@@ -197,6 +197,12 @@ struct options
   int ipv4_only;		/* IPv4 connections have been requested. */
   int ipv6_only;		/* IPv4 connections have been requested. */
 #endif
+  enum {
+    prefer_ipv4,
+    prefer_ipv6,
+    prefer_none
+  } prefer_family;		/* preferred address family when more
+				   than one type is available */
 };
 
 extern struct options opt;
diff --git a/src/utils.c b/src/utils.c
index 38700e04..1afb8429 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -1975,3 +1975,49 @@ base64_decode (const char *base64, char *to)
 #undef IS_ASCII
 #undef IS_BASE64
 #undef NEXT_BASE64_CHAR
+
+/* Simple merge sort for use by stable_sort.  Implementation courtesy
+   Zeljko Vrba.  */
+
+static void
+mergesort_internal (void *base, void *temp, size_t size, size_t from, size_t to,
+		    int (*cmpfun) PARAMS ((const void *, const void *)))
+{
+#define ELT(array, pos) ((char *)(array) + (pos) * size)
+  if (from < to)
+    {
+      size_t i, j, k;
+      size_t mid = (to + from) / 2;
+      mergesort_internal (base, temp, size, from, mid, cmpfun);
+      mergesort_internal (base, temp, size, mid + 1, to, cmpfun);
+      i = from;
+      j = mid + 1;
+      for (k = from; (i <= mid) && (j <= to); k++)
+	if (cmpfun (ELT (base, i), ELT (base, j)) <= 0)
+	  memcpy (ELT (temp, k), ELT (base, i++), size);
+	else
+	  memcpy (ELT (temp, k), ELT (base, j++), size);
+      while (i <= mid)
+	memcpy (ELT (temp, k++), ELT (base, i++), size);
+      while (j <= to)
+	memcpy (ELT (temp, k++), ELT (base, j++), size);
+      for (k = from; k <= to; k++)
+	memcpy (ELT (base, k), ELT (temp, k), size);
+    }
+#undef ELT
+}
+
+/* Stable sort with interface exactly like standard library's qsort.
+   Uses mergesort internally, allocating temporary storage with
+   alloca.  */
+
+void
+stable_sort (void *base, size_t nmemb, size_t size,
+	     int (*cmpfun) PARAMS ((const void *, const void *)))
+{
+  if (size > 1)
+    {
+      void *temp = alloca (nmemb * size * sizeof (void *));
+      mergesort_internal (base, temp, size, 0, nmemb - 1, cmpfun);
+    }
+}
diff --git a/src/utils.h b/src/utils.h
index 57992a60..51816a08 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -119,4 +119,7 @@ void xsleep PARAMS ((double));
 int base64_encode PARAMS ((const char *, int, char *));
 int base64_decode PARAMS ((const char *, char *));
 
+void stable_sort PARAMS ((void *, size_t, size_t,
+                          int (*) (const void *, const void *)));
+
 #endif /* UTILS_H */