diff --git a/src/html-url.c b/src/html-url.c
index 0d580f9a..5a0682d3 100644
--- a/src/html-url.c
+++ b/src/html-url.c
@@ -274,7 +274,6 @@ append_url (const char *link_uri,
struct urlpos *newel;
const char *base = ctx->base ? ctx->base : ctx->parent_base;
struct url *url;
- bool utf8_encode = false;
if (!base)
{
@@ -293,7 +292,9 @@ append_url (const char *link_uri,
return NULL;
}
- url = url_parse (link_uri, NULL, &utf8_encode);
+ set_ugly_no_encode (true);
+ url = url_parse (link_uri, NULL);
+ set_ugly_no_encode (false);
if (!url)
{
DEBUGP (("%s: link \"%s\" doesn't parse.\n",
@@ -312,7 +313,9 @@ append_url (const char *link_uri,
DEBUGP (("%s: merge(\"%s\", \"%s\") -> %s\n",
ctx->document_file, base, link_uri, complete_uri));
- url = url_parse (complete_uri, NULL, &utf8_encode);
+ set_ugly_no_encode (true);
+ url = url_parse (complete_uri, NULL);
+ set_ugly_no_encode (false);
if (!url)
{
DEBUGP (("%s: merged link \"%s\" doesn't parse.\n",
@@ -661,7 +664,6 @@ get_urls_file (const char *file)
struct file_memory *fm;
struct urlpos *head, *tail;
const char *text, *text_end;
- bool utf8_encode = false;
/* Load the file. */
fm = read_file (file);
@@ -713,7 +715,9 @@ get_urls_file (const char *file)
url_text = merged;
}
- url = url_parse (url_text, &up_error_code, &utf8_encode);
+ set_ugly_no_encode (true);
+ url = url_parse (url_text, &up_error_code);
+ set_ugly_no_encode (false);
if (!url)
{
logprintf (LOG_NOTQUIET, _("%s: Invalid URL %s: %s\n"),
diff --git a/src/iri.c b/src/iri.c
index 32eb7210..e5be2cf8 100644
--- a/src/iri.c
+++ b/src/iri.c
@@ -43,6 +43,8 @@ as that of the covered work. */
char *remote;
char *current;
+bool utf8_encode;
+bool ugly_no_encode;
static iconv_t locale2utf8;
@@ -358,3 +360,28 @@ set_remote_as_current (void)
remote = current ? xstrdup (current) : NULL;
}
+void reset_utf8_encode (void)
+{
+ set_utf8_encode (opt.enable_iri);
+}
+
+void set_utf8_encode (bool encode)
+{
+ utf8_encode = encode;
+}
+
+bool get_utf8_encode (void)
+{
+ return utf8_encode;
+}
+
+void set_ugly_no_encode (bool ugly)
+{
+ ugly_no_encode = ugly;
+}
+
+bool get_ugly_no_encode (void)
+{
+ return ugly_no_encode;
+}
+
diff --git a/src/iri.h b/src/iri.h
index 837dbfdd..413fb2f6 100644
--- a/src/iri.h
+++ b/src/iri.h
@@ -46,6 +46,13 @@ void set_current_charset (char *charset);
void set_remote_charset (char *charset);
void set_remote_as_current (void);
bool remote_to_utf8 (const char *str, const char **new);
+void reset_utf8_encode (void);
+void set_utf8_encode (bool encode);
+bool get_utf8_encode (void);
+
+/* ugly ugly ugly */
+void set_ugly_no_encode (bool ugly);
+bool get_ugly_no_encode (void);
#else /* ENABLE_IRI */
@@ -63,6 +70,9 @@ bool remote_to_utf8 (const char *str, const char **new);
#define set_remote_charset(str)
#define set_remote_as_current()
#define remote_to_utf8(a,b) false
+#define reset_utf8_encode()
+#define set_utf8_encode(a)
+#define get_utf8_encode() false
#endif /* ENABLE_IRI */
#endif /* IRI_H */
diff --git a/src/main.c b/src/main.c
index d0ff1d21..bf49bf89 100644
--- a/src/main.c
+++ b/src/main.c
@@ -1191,6 +1191,7 @@ WARNING: Can't reopen standard output in binary mode;\n\
int dt;
set_current_as_locale ();
+ set_ugly_no_encode (false);
if ((opt.recursive || opt.page_requisites)
&& (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (*t)))
diff --git a/src/recur.c b/src/recur.c
index d8279c39..6f5da2ae 100644
--- a/src/recur.c
+++ b/src/recur.c
@@ -190,7 +190,6 @@ uerr_t
retrieve_tree (const char *start_url)
{
uerr_t status = RETROK;
- bool utf8_encode = false;
/* The queue of URLs we need to load. */
struct url_queue *queue;
@@ -200,8 +199,11 @@ retrieve_tree (const char *start_url)
struct hash_table *blacklist;
int up_error_code;
- struct url *start_url_parsed = url_parse (start_url, &up_error_code, &utf8_encode);
+ struct url *start_url_parsed;
+ set_ugly_no_encode (true);
+ start_url_parsed= url_parse (start_url, &up_error_code);
+ set_ugly_no_encode (false);
if (!start_url_parsed)
{
logprintf (LOG_NOTQUIET, "%s: %s.\n", start_url,
@@ -338,7 +340,9 @@ retrieve_tree (const char *start_url)
if (children)
{
struct urlpos *child = children;
- struct url *url_parsed = url_parsed = url_parse (url, NULL, &utf8_encode);
+ set_ugly_no_encode (true);
+ struct url *url_parsed = url_parse (url, NULL);
+ set_ugly_no_encode (false);
char *referer_url = url;
bool strip_auth = (url_parsed != NULL
&& url_parsed->user != NULL);
@@ -641,13 +645,14 @@ descend_redirect_p (const char *redirected, const char *original, int depth,
struct url *orig_parsed, *new_parsed;
struct urlpos *upos;
bool success;
- bool utf8_encode = false;
- orig_parsed = url_parse (original, NULL, &utf8_encode);
+ set_ugly_no_encode (true);
+ orig_parsed = url_parse (original, NULL);
assert (orig_parsed != NULL);
- new_parsed = url_parse (redirected, NULL, &utf8_encode);
+ new_parsed = url_parse (redirected, NULL);
assert (new_parsed != NULL);
+ set_ugly_no_encode (false);
upos = xnew0 (struct urlpos);
upos->url = new_parsed;
diff --git a/src/retr.c b/src/retr.c
index 02106081..dd4978a7 100644
--- a/src/retr.c
+++ b/src/retr.c
@@ -613,8 +613,6 @@ retrieve_url (const char *origurl, char **file, char **newloc,
char *saved_post_data = NULL;
char *saved_post_file_name = NULL;
- bool utf8_encoded = opt.enable_iri;
-
/* If dt is NULL, use local storage. */
if (!dt)
{
@@ -627,8 +625,10 @@ retrieve_url (const char *origurl, char **file, char **newloc,
if (file)
*file = NULL;
+ reset_utf8_encode ();
+
second_try:
- u = url_parse (url, &up_error_code, &utf8_encoded);
+ u = url_parse (url, &up_error_code);
if (!u)
{
logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code));
@@ -652,9 +652,10 @@ retrieve_url (const char *origurl, char **file, char **newloc,
if (proxy)
{
/* sXXXav : support IRI for proxy */
- bool proxy_utf8_encode = false;
/* Parse the proxy URL. */
- proxy_url = url_parse (proxy, &up_error_code, &proxy_utf8_encode);
+ set_ugly_no_encode (true);
+ proxy_url = url_parse (proxy, &up_error_code);
+ set_ugly_no_encode (false);
if (!proxy_url)
{
logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"),
@@ -729,10 +730,10 @@ retrieve_url (const char *origurl, char **file, char **newloc,
xfree (mynewloc);
mynewloc = construced_newloc;
- utf8_encoded = opt.enable_iri;
+ reset_utf8_encode ();
/* Now, see if this new location makes sense. */
- newloc_parsed = url_parse (mynewloc, &up_error_code, &utf8_encoded);
+ newloc_parsed = url_parse (mynewloc, &up_error_code);
if (!newloc_parsed)
{
logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc),
@@ -780,9 +781,9 @@ retrieve_url (const char *origurl, char **file, char **newloc,
}
/* Try to not encode in UTF-8 if fetching failed */
- if (!(*dt & RETROKF) && utf8_encoded)
+ if (!(*dt & RETROKF) && get_utf8_encode ())
{
- utf8_encoded = false;
+ set_utf8_encode (false);
/*printf ("[Fallbacking to non-utf8 for `%s'\n", url);*/
goto second_try;
}
@@ -1036,8 +1037,11 @@ getproxy (struct url *u)
bool
url_uses_proxy (const char *url)
{
- bool ret, utf8_encode = false;
- struct url *u = url_parse (url, NULL, &utf8_encode);
+ bool ret;
+ struct url *u;
+ set_ugly_no_encode(true);
+ u= url_parse (url, NULL);
+ set_ugly_no_encode(false);
if (!u)
return false;
ret = getproxy (u) != NULL;
diff --git a/src/url.c b/src/url.c
index 32de9c75..c9489597 100644
--- a/src/url.c
+++ b/src/url.c
@@ -641,7 +641,7 @@ static const char *parse_errors[] = {
error, and if ERROR is not NULL, also set *ERROR to the appropriate
error code. */
struct url *
-url_parse (const char *url, int *error, bool *utf8_encode)
+url_parse (const char *url, int *error)
{
struct url *u;
const char *p;
@@ -671,12 +671,14 @@ url_parse (const char *url, int *error, bool *utf8_encode)
goto error;
}
- if (opt.enable_iri && *utf8_encode)
+ if (opt.enable_iri && get_utf8_encode () && !get_ugly_no_encode ())
{
const char *new;
+ bool utf8_encode;
url_unescape ((char *) url);
- *utf8_encode = remote_to_utf8 (url, &new);
- if (*utf8_encode)
+ utf8_encode = remote_to_utf8 (url, &new);
+ set_utf8_encode (utf8_encode);
+ if (utf8_encode)
url = new;
}
@@ -856,7 +858,7 @@ url_parse (const char *url, int *error, bool *utf8_encode)
if (opt.enable_iri)
{
- char *new = idn_encode (u->host, *utf8_encode);
+ char *new = idn_encode (u->host, get_utf8_encode ());
if (new)
{
xfree (u->host);
diff --git a/src/url.h b/src/url.h
index a174568e..7c8bcfed 100644
--- a/src/url.h
+++ b/src/url.h
@@ -84,7 +84,7 @@ struct url
char *url_escape (const char *);
-struct url *url_parse (const char *, int *, bool *);
+struct url *url_parse (const char *, int *);
const char *url_error (int);
char *url_full_path (const struct url *);
void url_set_dir (struct url *, const char *);