From 2ba8da8f22239564ac9964481507d89a2b399a05 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Tim=20R=C3=BChsen?= <tim.ruehsen@gmx.de>
Date: Mon, 6 May 2019 17:11:48 +0200
Subject: [PATCH] Replace XDIGIT_TO_NUM by _unhex() with proper unsigned
 handling

* src/wget.h: Replace XDIGIT_TO_NUM by _unhex()
* src/html-parse.c (decode_entity): Use _unhex()
* src/host.c (is_valid_ipv6_address): Use _unhex()
* src/url.c (url_unescape_1): Use unsigned char to avoid UB
* src/res.c (free_specs): Likewise

Code taken from Wget2. XDIGIT_TO_NUM had a signed issue, detected by
scan-build static anylyzer.
---
 src/host.c       | 2 +-
 src/html-parse.c | 2 +-
 src/res.c        | 2 +-
 src/url.c        | 6 +++---
 src/wget.h       | 9 ++++++---
 5 files changed, 12 insertions(+), 9 deletions(-)

diff --git a/src/host.c b/src/host.c
index e4f0ffb2..c3b91d23 100644
--- a/src/host.c
+++ b/src/host.c
@@ -516,7 +516,7 @@ is_valid_ipv6_address (const char *str, const char *end)
       if (c_isxdigit (ch))
         {
           val <<= 4;
-          val |= XDIGIT_TO_NUM (ch);
+          val |= _unhex (ch);
           if (val > 0xffff)
             return false;
           saw_xdigit = true;
diff --git a/src/html-parse.c b/src/html-parse.c
index da8fd767..44c1418e 100644
--- a/src/html-parse.c
+++ b/src/html-parse.c
@@ -386,7 +386,7 @@ decode_entity (const char **ptr, const char *end)
         value = 0;
         if (*p == 'x')
           for (++p; value < 256 && p < end && c_isxdigit (*p); p++, digits++)
-            value = (value << 4) + XDIGIT_TO_NUM (*p);
+            value = (value << 4) + _unhex (*p);
         else
           for (; value < 256 && p < end && c_isdigit (*p); p++, digits++)
             value = (value * 10) + (*p - '0');
diff --git a/src/res.c b/src/res.c
index f8281112..9b7f3667 100644
--- a/src/res.c
+++ b/src/res.c
@@ -419,7 +419,7 @@ free_specs (struct robot_specs *specs)
 #define DECODE_MAYBE(c, ptr) do {                               \
   if (c == '%' && c_isxdigit (ptr[1]) && c_isxdigit (ptr[2]))       \
     {                                                           \
-      char decoded = X2DIGITS_TO_NUM (ptr[1], ptr[2]);          \
+      unsigned char decoded = X2DIGITS_TO_NUM (ptr[1], ptr[2]);          \
       if (decoded != '/')                                       \
         {                                                       \
           c = decoded;                                          \
diff --git a/src/url.c b/src/url.c
index 7df640ac..0fb1349f 100644
--- a/src/url.c
+++ b/src/url.c
@@ -175,8 +175,8 @@ static const unsigned char urlchr_table[256] =
 static void
 url_unescape_1 (char *s, unsigned char mask)
 {
-  char *t = s;                  /* t - tortoise */
-  char *h = s;                  /* h - hare     */
+  unsigned char *t = (unsigned char *) s; /* t - tortoise */
+  unsigned char *h = (unsigned char *) s; /* h - hare     */
 
   for (; *h; h++, t++)
     {
@@ -187,7 +187,7 @@ url_unescape_1 (char *s, unsigned char mask)
         }
       else
         {
-          char c;
+          unsigned char c;
           /* Do nothing if '%' is not followed by two hex digits. */
           if (!h[1] || !h[2] || !(c_isxdigit (h[1]) && c_isxdigit (h[2])))
             goto copychar;
diff --git a/src/wget.h b/src/wget.h
index 85438338..0cf3f2ca 100644
--- a/src/wget.h
+++ b/src/wget.h
@@ -239,10 +239,13 @@ typedef double SUM_SIZE_INT;
 #define xzero(x) memset (&(x), '\0', sizeof (x))
 
 /* Convert an ASCII hex digit to the corresponding number between 0
-   and 15.  H should be a hexadecimal digit that satisfies isxdigit;
+   and 15.  c should be a hexadecimal digit that satisfies c_isxdigit;
    otherwise, the result is undefined.  */
-#define XDIGIT_TO_NUM(h) ((h) < 'A' ? (h) - '0' : c_toupper (h) - 'A' + 10)
-#define X2DIGITS_TO_NUM(h1, h2) ((XDIGIT_TO_NUM (h1) << 4) + XDIGIT_TO_NUM (h2))
+static unsigned char _unhex(unsigned char c)
+{
+	return c <= '9' ? c - '0' : (c <= 'F' ? c - 'A' + 10 : c - 'a' + 10);
+}
+#define X2DIGITS_TO_NUM(h1, h2) ((_unhex (h1) << 4) + _unhex (h2))
 
 /* The reverse of the above: convert a number in the [0, 16) range to
    the ASCII representation of the corresponding hexadecimal digit.