diff --git a/src/html-url.c b/src/html-url.c index 0d580f9a..5a0682d3 100644 --- a/src/html-url.c +++ b/src/html-url.c @@ -274,7 +274,6 @@ append_url (const char *link_uri, struct urlpos *newel; const char *base = ctx->base ? ctx->base : ctx->parent_base; struct url *url; - bool utf8_encode = false; if (!base) { @@ -293,7 +292,9 @@ append_url (const char *link_uri, return NULL; } - url = url_parse (link_uri, NULL, &utf8_encode); + set_ugly_no_encode (true); + url = url_parse (link_uri, NULL); + set_ugly_no_encode (false); if (!url) { DEBUGP (("%s: link \"%s\" doesn't parse.\n", @@ -312,7 +313,9 @@ append_url (const char *link_uri, DEBUGP (("%s: merge(\"%s\", \"%s\") -> %s\n", ctx->document_file, base, link_uri, complete_uri)); - url = url_parse (complete_uri, NULL, &utf8_encode); + set_ugly_no_encode (true); + url = url_parse (complete_uri, NULL); + set_ugly_no_encode (false); if (!url) { DEBUGP (("%s: merged link \"%s\" doesn't parse.\n", @@ -661,7 +664,6 @@ get_urls_file (const char *file) struct file_memory *fm; struct urlpos *head, *tail; const char *text, *text_end; - bool utf8_encode = false; /* Load the file. */ fm = read_file (file); @@ -713,7 +715,9 @@ get_urls_file (const char *file) url_text = merged; } - url = url_parse (url_text, &up_error_code, &utf8_encode); + set_ugly_no_encode (true); + url = url_parse (url_text, &up_error_code); + set_ugly_no_encode (false); if (!url) { logprintf (LOG_NOTQUIET, _("%s: Invalid URL %s: %s\n"), diff --git a/src/iri.c b/src/iri.c index 32eb7210..e5be2cf8 100644 --- a/src/iri.c +++ b/src/iri.c @@ -43,6 +43,8 @@ as that of the covered work. */ char *remote; char *current; +bool utf8_encode; +bool ugly_no_encode; static iconv_t locale2utf8; @@ -358,3 +360,28 @@ set_remote_as_current (void) remote = current ? xstrdup (current) : NULL; } +void reset_utf8_encode (void) +{ + set_utf8_encode (opt.enable_iri); +} + +void set_utf8_encode (bool encode) +{ + utf8_encode = encode; +} + +bool get_utf8_encode (void) +{ + return utf8_encode; +} + +void set_ugly_no_encode (bool ugly) +{ + ugly_no_encode = ugly; +} + +bool get_ugly_no_encode (void) +{ + return ugly_no_encode; +} + diff --git a/src/iri.h b/src/iri.h index 837dbfdd..413fb2f6 100644 --- a/src/iri.h +++ b/src/iri.h @@ -46,6 +46,13 @@ void set_current_charset (char *charset); void set_remote_charset (char *charset); void set_remote_as_current (void); bool remote_to_utf8 (const char *str, const char **new); +void reset_utf8_encode (void); +void set_utf8_encode (bool encode); +bool get_utf8_encode (void); + +/* ugly ugly ugly */ +void set_ugly_no_encode (bool ugly); +bool get_ugly_no_encode (void); #else /* ENABLE_IRI */ @@ -63,6 +70,9 @@ bool remote_to_utf8 (const char *str, const char **new); #define set_remote_charset(str) #define set_remote_as_current() #define remote_to_utf8(a,b) false +#define reset_utf8_encode() +#define set_utf8_encode(a) +#define get_utf8_encode() false #endif /* ENABLE_IRI */ #endif /* IRI_H */ diff --git a/src/main.c b/src/main.c index d0ff1d21..bf49bf89 100644 --- a/src/main.c +++ b/src/main.c @@ -1191,6 +1191,7 @@ WARNING: Can't reopen standard output in binary mode;\n\ int dt; set_current_as_locale (); + set_ugly_no_encode (false); if ((opt.recursive || opt.page_requisites) && (url_scheme (*t) != SCHEME_FTP || url_uses_proxy (*t))) diff --git a/src/recur.c b/src/recur.c index d8279c39..6f5da2ae 100644 --- a/src/recur.c +++ b/src/recur.c @@ -190,7 +190,6 @@ uerr_t retrieve_tree (const char *start_url) { uerr_t status = RETROK; - bool utf8_encode = false; /* The queue of URLs we need to load. */ struct url_queue *queue; @@ -200,8 +199,11 @@ retrieve_tree (const char *start_url) struct hash_table *blacklist; int up_error_code; - struct url *start_url_parsed = url_parse (start_url, &up_error_code, &utf8_encode); + struct url *start_url_parsed; + set_ugly_no_encode (true); + start_url_parsed= url_parse (start_url, &up_error_code); + set_ugly_no_encode (false); if (!start_url_parsed) { logprintf (LOG_NOTQUIET, "%s: %s.\n", start_url, @@ -338,7 +340,9 @@ retrieve_tree (const char *start_url) if (children) { struct urlpos *child = children; - struct url *url_parsed = url_parsed = url_parse (url, NULL, &utf8_encode); + set_ugly_no_encode (true); + struct url *url_parsed = url_parse (url, NULL); + set_ugly_no_encode (false); char *referer_url = url; bool strip_auth = (url_parsed != NULL && url_parsed->user != NULL); @@ -641,13 +645,14 @@ descend_redirect_p (const char *redirected, const char *original, int depth, struct url *orig_parsed, *new_parsed; struct urlpos *upos; bool success; - bool utf8_encode = false; - orig_parsed = url_parse (original, NULL, &utf8_encode); + set_ugly_no_encode (true); + orig_parsed = url_parse (original, NULL); assert (orig_parsed != NULL); - new_parsed = url_parse (redirected, NULL, &utf8_encode); + new_parsed = url_parse (redirected, NULL); assert (new_parsed != NULL); + set_ugly_no_encode (false); upos = xnew0 (struct urlpos); upos->url = new_parsed; diff --git a/src/retr.c b/src/retr.c index 02106081..dd4978a7 100644 --- a/src/retr.c +++ b/src/retr.c @@ -613,8 +613,6 @@ retrieve_url (const char *origurl, char **file, char **newloc, char *saved_post_data = NULL; char *saved_post_file_name = NULL; - bool utf8_encoded = opt.enable_iri; - /* If dt is NULL, use local storage. */ if (!dt) { @@ -627,8 +625,10 @@ retrieve_url (const char *origurl, char **file, char **newloc, if (file) *file = NULL; + reset_utf8_encode (); + second_try: - u = url_parse (url, &up_error_code, &utf8_encoded); + u = url_parse (url, &up_error_code); if (!u) { logprintf (LOG_NOTQUIET, "%s: %s.\n", url, url_error (up_error_code)); @@ -652,9 +652,10 @@ retrieve_url (const char *origurl, char **file, char **newloc, if (proxy) { /* sXXXav : support IRI for proxy */ - bool proxy_utf8_encode = false; /* Parse the proxy URL. */ - proxy_url = url_parse (proxy, &up_error_code, &proxy_utf8_encode); + set_ugly_no_encode (true); + proxy_url = url_parse (proxy, &up_error_code); + set_ugly_no_encode (false); if (!proxy_url) { logprintf (LOG_NOTQUIET, _("Error parsing proxy URL %s: %s.\n"), @@ -729,10 +730,10 @@ retrieve_url (const char *origurl, char **file, char **newloc, xfree (mynewloc); mynewloc = construced_newloc; - utf8_encoded = opt.enable_iri; + reset_utf8_encode (); /* Now, see if this new location makes sense. */ - newloc_parsed = url_parse (mynewloc, &up_error_code, &utf8_encoded); + newloc_parsed = url_parse (mynewloc, &up_error_code); if (!newloc_parsed) { logprintf (LOG_NOTQUIET, "%s: %s.\n", escnonprint_uri (mynewloc), @@ -780,9 +781,9 @@ retrieve_url (const char *origurl, char **file, char **newloc, } /* Try to not encode in UTF-8 if fetching failed */ - if (!(*dt & RETROKF) && utf8_encoded) + if (!(*dt & RETROKF) && get_utf8_encode ()) { - utf8_encoded = false; + set_utf8_encode (false); /*printf ("[Fallbacking to non-utf8 for `%s'\n", url);*/ goto second_try; } @@ -1036,8 +1037,11 @@ getproxy (struct url *u) bool url_uses_proxy (const char *url) { - bool ret, utf8_encode = false; - struct url *u = url_parse (url, NULL, &utf8_encode); + bool ret; + struct url *u; + set_ugly_no_encode(true); + u= url_parse (url, NULL); + set_ugly_no_encode(false); if (!u) return false; ret = getproxy (u) != NULL; diff --git a/src/url.c b/src/url.c index 32de9c75..c9489597 100644 --- a/src/url.c +++ b/src/url.c @@ -641,7 +641,7 @@ static const char *parse_errors[] = { error, and if ERROR is not NULL, also set *ERROR to the appropriate error code. */ struct url * -url_parse (const char *url, int *error, bool *utf8_encode) +url_parse (const char *url, int *error) { struct url *u; const char *p; @@ -671,12 +671,14 @@ url_parse (const char *url, int *error, bool *utf8_encode) goto error; } - if (opt.enable_iri && *utf8_encode) + if (opt.enable_iri && get_utf8_encode () && !get_ugly_no_encode ()) { const char *new; + bool utf8_encode; url_unescape ((char *) url); - *utf8_encode = remote_to_utf8 (url, &new); - if (*utf8_encode) + utf8_encode = remote_to_utf8 (url, &new); + set_utf8_encode (utf8_encode); + if (utf8_encode) url = new; } @@ -856,7 +858,7 @@ url_parse (const char *url, int *error, bool *utf8_encode) if (opt.enable_iri) { - char *new = idn_encode (u->host, *utf8_encode); + char *new = idn_encode (u->host, get_utf8_encode ()); if (new) { xfree (u->host); diff --git a/src/url.h b/src/url.h index a174568e..7c8bcfed 100644 --- a/src/url.h +++ b/src/url.h @@ -84,7 +84,7 @@ struct url char *url_escape (const char *); -struct url *url_parse (const char *, int *, bool *); +struct url *url_parse (const char *, int *); const char *url_error (int); char *url_full_path (const struct url *); void url_set_dir (struct url *, const char *);