From 00ae9b4ee2caa6640776e9777e7ef5a374d082c0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tim=20R=C3=BChsen?= Date: Thu, 3 Nov 2016 16:41:05 +0100 Subject: [PATCH] Move Wget from IDN2003 (libidn) to IDN2008 (libidn2) * .travis.yml: Install libidn2-dev instead libidn11-dev. * bootstrap.conf: Add modules libunistring-optional, unistr/base, unicase/tolower. * configure.ac: Check for libidn2. * src/Makefile.am: Add $(LTLIBUNISTRING) to LDADD. * tests/Makefile.am: Set LDADD similar to LDADD in src/Makefile.am * src/connect.c: Use libidn2 code instead of libidn. * src/host.c: Likewise. * src/iri.c: Likewise. * src/iri.h: Likewise. * src/options.h: Likewise. * src/url.c: Likewise. * src/url.h: Likewise. * src/log.c: Fix C99 comment. IDN2003 should not be used any more due to security concerns. We use libunistring (resp. the unicode code from gnulib) for lowercasing UTF-8 before we give data to libidn2. TR#46 is missing, no support in libidn2 nor in libunistring. --- .travis.yml | 2 +- bootstrap.conf | 3 +++ configure.ac | 49 +++++++++++++++--------------------- src/Makefile.am | 2 +- src/connect.c | 4 +-- src/host.c | 4 +-- src/iri.c | 64 +++++++++++++++++++++++++++++++++-------------- src/iri.h | 7 +++--- src/log.c | 4 +-- src/options.h | 2 +- src/url.c | 2 +- src/url.h | 4 +-- tests/Makefile.am | 5 +++- 13 files changed, 87 insertions(+), 65 deletions(-) diff --git a/.travis.yml b/.travis.yml index aa9f458b..c81e0a93 100644 --- a/.travis.yml +++ b/.travis.yml @@ -36,7 +36,7 @@ addons: - make - libhttp-daemon-perl - libio-socket-ssl-perl - - libidn11-dev + - libidn2-dev - gettext - texlive - python3 diff --git a/bootstrap.conf b/bootstrap.conf index 54553bce..fbdbe6b9 100644 --- a/bootstrap.conf +++ b/bootstrap.conf @@ -101,6 +101,9 @@ tmpdir unlink unlocked-io update-copyright +libunistring-optional +unistr/base +unicase/tolower vasprintf vsnprintf write diff --git a/configure.ac b/configure.ac index 20cdbd25..49657762 100644 --- a/configure.ac +++ b/configure.ac @@ -640,9 +640,6 @@ AC_ARG_ENABLE(iri, ] ) -AC_ARG_WITH(libidn, AC_HELP_STRING([--with-libidn=[DIR]], - [Support IDN/IRIs (needs GNU Libidn)]), - libidn=$withval, libidn="") AS_IF([test "X$iri" != "Xno"],[ AM_ICONV @@ -660,33 +657,26 @@ AS_IF([test "X$iri" != "Xno"],[ LIBICONV= ]) +AC_ARG_WITH(libidn, AC_HELP_STRING([--with-libidn=[DIR]], + [Support IDN2008/IRIs (needs GNU libidn2 + libunicode)]), + libidn=$withval, libidn="") if test "X$iri" != "Xno"; then - if test "$libidn" != ""; then - LDFLAGS="${LDFLAGS} -L$libidn/lib" - CPPFLAGS="${CPPFLAGS} -I$libidn/include" - fi + AS_IF([test "x$with_libidn2" != xno], [ + AC_SEARCH_LIBS(idn2_lookup_u8, idn2, + [with_libidn2=yes; AC_DEFINE([ENABLE_IRI], 1, [Define if IRI support is enabled.])], + [with_libidn2=no; iri=no; AC_MSG_WARN(*** LIBIDN2 was not found. You will not be able to use IDN2008 support)]) - # If idna.h can't be found, check to see if it was installed under - # /usr/include/idn (OpenSolaris, at least, places it there). - # Check for idn-int.h in that case, because idna.h won't find - # idn-int.h until we've decided to add -I/usr/include/idn. - AC_CHECK_HEADER(idna.h, , - [AC_CHECK_HEADER(idn/idn-int.h, - [CPPFLAGS="${CPPFLAGS} -I/usr/include/idn"], - [iri=no])] - ) - - if test "X$iri" != "Xno"; then - AC_CHECK_LIB(idn, stringprep_check_version, - [iri=yes LIBS="${LIBS} -lidn"], iri=no) - fi - - if test "X$iri" != "Xno" ; then - AC_DEFINE([ENABLE_IRI], 1, [Define if IRI support is enabled.]) - AC_MSG_NOTICE([Enabling support for IRI.]) - else - AC_MSG_WARN([Libidn not found]) - fi +# AS_IF([test "x$with_libidn2" = xyes], [ +# AC_SEARCH_LIBS(u8_tolower, unistring, +# [AC_DEFINE([ENABLE_IRI], 1, [Define if IRI support is enabled.])], +# [iri=no; AC_MSG_WARN(*** LIBUNISTRING was not found. You will not be able to use IDN2008 support)]) +# ]) + ]) +fi +if test "X$iri" = "Xno"; then + # we don't need libunistring - clear settings from gnulib module + LIBUNISTRING="" + LTLIBUNISTRING="" fi dnl @@ -755,7 +745,7 @@ dnl dnl Check for libcares (resolver library) dnl -AS_IF([test "X$with_cares" == "Xyes"],[ +AS_IF([test "X$with_cares" = "Xyes"],[ PKG_CHECK_MODULES([CARES], libcares, [ CFLAGS="$CARES_CFLAGS $CFLAGS" AC_CHECK_HEADER(ares.h, [ @@ -839,4 +829,5 @@ AC_MSG_NOTICE([Summary of build options: Metalink: $with_metalink Resolver: $RESOLVER_INFO GPGME: $have_gpg + IRI: $iri ]) diff --git a/src/Makefile.am b/src/Makefile.am index 5311bba2..526d458a 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -65,7 +65,7 @@ nodist_wget_SOURCES = version.c EXTRA_wget_SOURCES = iri.c LDADD = $(LIBOBJS) ../lib/libgnu.a $(GETADDRINFO_LIB) $(HOSTENT_LIB) $(INET_NTOP_LIB) $(LIBSOCKET)\ $(LIB_CLOCK_GETTIME) $(LIB_CRYPTO) $(LIB_SELECT) $(LTLIBICONV) $(LTLIBINTL) $(LTLIBTHREAD)\ - $(SERVENT_LIB) + $(LTLIBUNISTRING) $(SERVENT_LIB) AM_CPPFLAGS = -I$(top_builddir)/lib -I$(top_srcdir)/lib diff --git a/src/connect.c b/src/connect.c index 60b0294a..e41f1484 100644 --- a/src/connect.c +++ b/src/connect.c @@ -56,7 +56,7 @@ as that of the covered work. */ #include #ifdef ENABLE_IRI -#include +#include #endif #include "utils.h" @@ -283,7 +283,7 @@ connect_to_ip (const ip_address *ip, int port, const char *print) str = xmalloc (len); snprintf (str, len, "%s (%s)", name, print); str[len-1] = '\0'; - idn_free (name); + idn2_free (name); } logprintf (LOG_VERBOSE, _("Connecting to %s|%s|:%d... "), diff --git a/src/host.c b/src/host.c index 414a2949..8d58271b 100644 --- a/src/host.c +++ b/src/host.c @@ -58,7 +58,7 @@ as that of the covered work. */ #include #ifdef ENABLE_IRI -#include +#include #endif #include "utils.h" @@ -852,7 +852,7 @@ lookup_host (const char *host, int flags) str = xmalloc (len); snprintf (str, len, "%s (%s)", name, host); str[len-1] = '\0'; - idn_free (name); + idn2_free (name); } logprintf (LOG_VERBOSE, _("Resolving %s... "), diff --git a/src/iri.c b/src/iri.c index 8b3e78cd..321828df 100644 --- a/src/iri.c +++ b/src/iri.c @@ -33,13 +33,14 @@ as that of the covered work. */ #include #include #include +#include +#include #ifdef HAVE_ICONV # include #endif -#include -#include -#include -#include +#include +#include +#include #include "utils.h" #include "url.h" @@ -90,10 +91,15 @@ parse_charset (const char *str) } /* Find the locale used, or fall back on a default value */ -char * +const char * find_locale (void) { - return (char *) stringprep_locale_charset (); + const char *encoding = nl_langinfo(CODESET); + + if (!encoding || !*encoding) + return "ASCII"; + + return encoding; } /* Basic check of an encoding name. */ @@ -284,33 +290,47 @@ idn_encode (const struct iri *i, const char *host) int ret; char *ascii_encoded; char *utf8_encoded = NULL; + const char *src; + uint8_t *lower; + size_t len = 0; /* Encode to UTF-8 if not done */ if (!i->utf8_encode) { if (!remote_to_utf8 (i, host, &utf8_encoded)) return NULL; /* Nothing to encode or an error occured */ + src = utf8_encoded; } + else + src = host; - if (!_utf8_is_valid(utf8_encoded ? utf8_encoded : host)) + if (!_utf8_is_valid (src)) { logprintf (LOG_VERBOSE, _("Invalid UTF-8 sequence: %s\n"), - quote(utf8_encoded ? utf8_encoded : host)); + quote (src)); xfree (utf8_encoded); return NULL; } - /* Store in ascii_encoded the ASCII UTF-8 NULL terminated string */ - ret = idna_to_ascii_8z (utf8_encoded ? utf8_encoded : host, &ascii_encoded, IDNA_FLAGS); - xfree (utf8_encoded); - - if (ret != IDNA_SUCCESS) + /* we need a conversion to lowercase */ + lower = u8_tolower ((uint8_t *) src, u8_strlen ((uint8_t *) src) + 1, 0, UNINORM_NFKC, NULL, &len); + if (!lower) { - logprintf (LOG_VERBOSE, _("idn_encode failed (%d): %s\n"), ret, - quote (idna_strerror (ret))); + logprintf (LOG_VERBOSE, _("Failed to convert to lower: %d: %s\n"), + errno, quote (src)); + xfree (utf8_encoded); return NULL; } + if ((ret = idn2_lookup_u8 (lower, (uint8_t **) &ascii_encoded, IDN2_NFC_INPUT)) != IDN2_OK) + { + logprintf (LOG_VERBOSE, _("idn_encode failed (%d): %s\n"), ret, + quote (idn2_strerror (ret))); + ascii_encoded = NULL; + } + + xfree (lower); + return ascii_encoded; } @@ -319,18 +339,24 @@ idn_encode (const struct iri *i, const char *host) char * idn_decode (const char *host) { +/* char *new; int ret; - ret = idna_to_unicode_8zlz (host, &new, IDNA_FLAGS); - if (ret != IDNA_SUCCESS) + ret = idn2_register_u8 (NULL, host, (uint8_t **) &new, 0); + if (ret != IDN2_OK) { - logprintf (LOG_VERBOSE, _("idn_decode failed (%d): %s\n"), ret, - quote (idna_strerror (ret))); + logprintf (LOG_VERBOSE, _("idn2_register_u8 failed (%d): %s: %s\n"), ret, + quote (idn2_strerror (ret)), host); return NULL; } return new; +*/ + /* idn2_register_u8() just works label by label. + * That is pretty much overhead for just displaying the original ulabels. + * To keep at least the debug output format, return a cloned host. */ + return xstrdup(host); } /* Try to transcode string str from remote encoding to UTF-8. On success, *new diff --git a/src/iri.h b/src/iri.h index c350cbf8..ba64a275 100644 --- a/src/iri.h +++ b/src/iri.h @@ -40,11 +40,10 @@ struct iri { #ifdef ENABLE_IRI -# include -# include +# include char *parse_charset (const char *str); -char *find_locale (void); +const char *find_locale (void); bool check_encoding_name (const char *encoding); const char *locale_to_utf8 (const char *str); char *idn_encode (const struct iri *i, const char *host); @@ -66,7 +65,7 @@ extern struct iri dummy_iri; #define locale_to_utf8(str) (str) #define idn_encode(a,b) NULL #define idn_decode(str) NULL -#define idn_free(str) ((void)0) +#define idn2_free(str) ((void)0) #define remote_to_utf8(a,b,c) false #define iri_new() (&dummy_iri) #define iri_dup(a) (&dummy_iri) diff --git a/src/log.c b/src/log.c index e122238c..e068acf6 100644 --- a/src/log.c +++ b/src/log.c @@ -964,12 +964,12 @@ check_redirect_output (void) { if (tcgetpgrp (STDIN_FILENO) != getpgrp ()) { - // Process backgrounded + /* Process backgrounded */ redirect_output (true,NULL); } else { - // Process foregrounded + /* Process foregrounded */ redirect_output (false,NULL); } } diff --git a/src/options.h b/src/options.h index 9fcefc35..d713acc5 100644 --- a/src/options.h +++ b/src/options.h @@ -311,7 +311,7 @@ struct options bool enable_iri; char *encoding_remote; - char *locale; + const char *locale; bool trustservernames; #ifdef __VMS diff --git a/src/url.c b/src/url.c index bd6570ad..8951c717 100644 --- a/src/url.c +++ b/src/url.c @@ -1213,7 +1213,7 @@ url_free (struct url *url) if (url) { if (url->idn_allocated) { - idn_free (url->host); /* A dummy if !defined(ENABLE_IRI) */ + idn2_free (url->host); /* A dummy if !defined(ENABLE_IRI) */ url->host = NULL; } else diff --git a/src/url.h b/src/url.h index f4ffe20f..94d1528c 100644 --- a/src/url.h +++ b/src/url.h @@ -100,8 +100,8 @@ struct url char *user; char *passwd; - /* 'host' is allocated by idna_to_ascii_8z() via idn_encode(). - * Call 'idn_free()' to free this memory. */ + /* 'host' is allocated by idn2_lookup_u8() via idn_encode(). + * Call 'idn2_free()' to free this memory. */ bool idn_allocated; }; diff --git a/tests/Makefile.am b/tests/Makefile.am index 902af014..3af00692 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -136,7 +136,10 @@ EXTRA_DIST = FTPServer.pm FTPTest.pm HTTPServer.pm HTTPTest.pm \ check_PROGRAMS = unit-tests unit_tests_SOURCES = -LDADD = ../src/libunittest.a ../lib/libgnu.a @LIBICONV@ @LIBINTL@ $(LIBS) $(LIB_CLOCK_GETTIME) +LDADD = ../src/libunittest.a ../lib/libgnu.a $(GETADDRINFO_LIB) $(HOSTENT_LIB) $(INET_NTOP_LIB) $(LIBSOCKET)\ + $(LIB_CLOCK_GETTIME) $(LIB_CRYPTO) $(LIB_SELECT) $(LTLIBICONV) $(LTLIBINTL) $(LTLIBTHREAD)\ + $(LTLIBUNISTRING) $(SERVENT_LIB) + CLEANFILES = *~ *.bak core core.[0-9]*