Fix charset transcoding issue for non-reversible codepoints

* src/url.c: Check iconv() against 0, not -1

On some libiconv implementations, unknown codepoints become
encoded as ?, e.g. when converting a non-ascii codepoint to ASCII.
This results in ambigious file names which also fails our tests.
This commit is contained in:
Tim Rühsen 2017-04-16 19:55:14 +02:00
parent fc2f4233ed
commit 92bfe2a2e4
2 changed files with 45 additions and 40 deletions

2
gnulib

@ -1 +1 @@
Subproject commit 683b6078961f10905baba598c469402ed0133425 Subproject commit f497bc109ac365359ce499aeaef74f39c7e4e8c7

View File

@ -1567,9 +1567,9 @@ convert_fname (char *fname)
to_encoding = nl_langinfo (CODESET); to_encoding = nl_langinfo (CODESET);
cd = iconv_open (to_encoding, from_encoding); cd = iconv_open (to_encoding, from_encoding);
if (cd == (iconv_t)(-1)) if (cd == (iconv_t) (-1))
logprintf (LOG_VERBOSE, _("Conversion from %s to %s isn't supported\n"), logprintf (LOG_VERBOSE, _ ("Conversion from %s to %s isn't supported\n"),
quote (from_encoding), quote (to_encoding)); quote (from_encoding), quote (to_encoding));
else else
{ {
inlen = strlen (fname); inlen = strlen (fname);
@ -1578,44 +1578,49 @@ convert_fname (char *fname)
done = 0; done = 0;
for (;;) for (;;)
{ {
if (iconv (cd, (ICONV_CONST char **) &fname, &inlen, &s, &outlen) != (size_t)(-1) errno = 0;
&& iconv (cd, NULL, NULL, &s, &outlen) != (size_t)(-1)) if (iconv (cd, (ICONV_CONST char **) &fname, &inlen, &s, &outlen) == 0
{ && iconv (cd, NULL, NULL, &s, &outlen) == 0)
*(converted_fname + len - outlen - done) = '\0'; {
iconv_close(cd); *(converted_fname + len - outlen - done) = '\0';
DEBUGP (("Converted file name '%s' (%s) -> '%s' (%s)\n", iconv_close (cd);
orig_fname, from_encoding, converted_fname, to_encoding)); DEBUGP (("Converted file name '%s' (%s) -> '%s' (%s)\n",
xfree (orig_fname); orig_fname, from_encoding, converted_fname, to_encoding));
return converted_fname; xfree (orig_fname);
} return converted_fname;
}
/* Incomplete or invalid multibyte sequence */ /* Incomplete or invalid multibyte sequence */
if (errno == EINVAL || errno == EILSEQ) if (errno == EINVAL || errno == EILSEQ || errno == 0)
{ {
logprintf (LOG_VERBOSE, if (errno)
_("Incomplete or invalid multibyte sequence encountered\n")); logprintf (LOG_VERBOSE,
xfree (converted_fname); _ ("Incomplete or invalid multibyte sequence encountered\n"));
converted_fname = (char *)orig_fname; else
break; logprintf (LOG_VERBOSE,
} _ ("Unconvertable multibyte sequence encountered\n"));
else if (errno == E2BIG) /* Output buffer full */ xfree (converted_fname);
{ converted_fname = (char *) orig_fname;
done = len; break;
len = outlen = done + inlen * 2; }
converted_fname = xrealloc (converted_fname, outlen + 1); else if (errno == E2BIG) /* Output buffer full */
s = converted_fname + done; {
} done = len;
else /* Weird, we got an unspecified error */ len = outlen = done + inlen * 2;
{ converted_fname = xrealloc (converted_fname, outlen + 1);
logprintf (LOG_VERBOSE, _("Unhandled errno %d\n"), errno); s = converted_fname + done;
xfree (converted_fname); }
converted_fname = (char *)orig_fname; else /* Weird, we got an unspecified error */
break; {
} logprintf (LOG_VERBOSE, _ ("Unhandled errno %d\n"), errno);
} xfree (converted_fname);
converted_fname = (char *) orig_fname;
break;
}
}
DEBUGP (("Failed to convert file name '%s' (%s) -> '?' (%s)\n", DEBUGP (("Failed to convert file name '%s' (%s) -> '?' (%s)\n",
orig_fname, from_encoding, to_encoding)); orig_fname, from_encoding, to_encoding));
} }
iconv_close(cd); iconv_close(cd);