mirror of
https://github.com/mirror/wget.git
synced 2025-03-12 18:50:18 +08:00
Print message for no-follow attribute only if norobots respected
* src/html-url.c (get_urls_html_fm): Remove misleading log message.
* src/recur.c (retrieve_tree): Add log message into correct if block.
Commit e39be32838
added a message that
said links will not be followed whenever the nofollow attribute is found
in a page. It didn't take into account that with -e robots=off (and
equivalents) links will still be followed.
This bug has been noticed multiple times:
* https://www.reddit.com/r/DataHoarder/comments/mprq89/wget_respects_nofollow_attribute_despite_e/
* https://gist.github.com/simonw/27e810771137408fd7834ad153750c41#gistcomment-3648191
* https://superuser.com/questions/1494761/wget-wont-ignore-no-follow-attributes
This commits makes it so that this message is only printed when a
nofollow link is found and the norobots convention is respected.
Copyright-paperwork-exempt: Yes
This commit is contained in:
parent
11c626efc4
commit
f1cccd2c45
@ -837,9 +837,6 @@ get_urls_html_fm (const char *file, const struct file_memory *fm,
|
||||
#endif
|
||||
xfree (meta_charset);
|
||||
|
||||
if (ctx.nofollow) {
|
||||
logprintf(LOG_VERBOSE, _("no-follow attribute found in %s. Will not follow any links on this page\n"), file);
|
||||
}
|
||||
DEBUGP (("no-follow in %s: %d\n", file, ctx.nofollow));
|
||||
|
||||
if (meta_disallow_follow)
|
||||
|
@ -427,6 +427,7 @@ retrieve_tree (struct url *start_url_parsed, struct iri *pi)
|
||||
|
||||
if (opt.use_robots && meta_disallow_follow)
|
||||
{
|
||||
logprintf(LOG_VERBOSE, _("no-follow attribute found in %s. Will not follow any links on this page\n"), file);
|
||||
free_urlpos (children);
|
||||
children = NULL;
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user