mirror of
https://github.com/mirror/wget.git
synced 2025-03-25 09:10:13 +08:00
[svn] Squash embedded newlines in links.
This commit is contained in:
parent
1b2dce0493
commit
3f84a5e00e
@ -1,3 +1,8 @@
|
|||||||
|
2003-11-26 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
|
* html-parse.c (convert_and_copy): Remove embedded newlines when
|
||||||
|
AP_TRIM_BLANKS is specified.
|
||||||
|
|
||||||
2003-11-26 Hrvoje Niksic <hniksic@xemacs.org>
|
2003-11-26 Hrvoje Niksic <hniksic@xemacs.org>
|
||||||
|
|
||||||
* ftp.c: Set con->csock to -1 where rbuf_uninitialize was
|
* ftp.c: Set con->csock to -1 where rbuf_uninitialize was
|
||||||
|
@ -360,17 +360,16 @@ enum {
|
|||||||
the ASCII range when copying the string.
|
the ASCII range when copying the string.
|
||||||
|
|
||||||
* AP_TRIM_BLANKS -- ignore blanks at the beginning and at the end
|
* AP_TRIM_BLANKS -- ignore blanks at the beginning and at the end
|
||||||
of text. */
|
of text, as well as embedded newlines. */
|
||||||
|
|
||||||
static void
|
static void
|
||||||
convert_and_copy (struct pool *pool, const char *beg, const char *end, int flags)
|
convert_and_copy (struct pool *pool, const char *beg, const char *end, int flags)
|
||||||
{
|
{
|
||||||
int old_tail = pool->tail;
|
int old_tail = pool->tail;
|
||||||
int size;
|
|
||||||
|
|
||||||
/* First, skip blanks if required. We must do this before entities
|
/* Skip blanks if required. We must do this before entities are
|
||||||
are processed, so that blanks can still be inserted as, for
|
processed, so that blanks can still be inserted as, for instance,
|
||||||
instance, ` '. */
|
` '. */
|
||||||
if (flags & AP_TRIM_BLANKS)
|
if (flags & AP_TRIM_BLANKS)
|
||||||
{
|
{
|
||||||
while (beg < end && ISSPACE (*beg))
|
while (beg < end && ISSPACE (*beg))
|
||||||
@ -378,7 +377,6 @@ convert_and_copy (struct pool *pool, const char *beg, const char *end, int flags
|
|||||||
while (end > beg && ISSPACE (end[-1]))
|
while (end > beg && ISSPACE (end[-1]))
|
||||||
--end;
|
--end;
|
||||||
}
|
}
|
||||||
size = end - beg;
|
|
||||||
|
|
||||||
if (flags & AP_DECODE_ENTITIES)
|
if (flags & AP_DECODE_ENTITIES)
|
||||||
{
|
{
|
||||||
@ -391,15 +389,14 @@ convert_and_copy (struct pool *pool, const char *beg, const char *end, int flags
|
|||||||
never lengthen it. */
|
never lengthen it. */
|
||||||
const char *from = beg;
|
const char *from = beg;
|
||||||
char *to;
|
char *to;
|
||||||
|
int squash_newlines = flags & AP_TRIM_BLANKS;
|
||||||
|
|
||||||
POOL_GROW (pool, end - beg);
|
POOL_GROW (pool, end - beg);
|
||||||
to = pool->contents + pool->tail;
|
to = pool->contents + pool->tail;
|
||||||
|
|
||||||
while (from < end)
|
while (from < end)
|
||||||
{
|
{
|
||||||
if (*from != '&')
|
if (*from == '&')
|
||||||
*to++ = *from++;
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
int entity = decode_entity (&from, end);
|
int entity = decode_entity (&from, end);
|
||||||
if (entity != -1)
|
if (entity != -1)
|
||||||
@ -407,6 +404,10 @@ convert_and_copy (struct pool *pool, const char *beg, const char *end, int flags
|
|||||||
else
|
else
|
||||||
*to++ = *from++;
|
*to++ = *from++;
|
||||||
}
|
}
|
||||||
|
else if ((*from == '\n' || *from == '\r') && squash_newlines)
|
||||||
|
++from;
|
||||||
|
else
|
||||||
|
*to++ = *from++;
|
||||||
}
|
}
|
||||||
/* Verify that we haven't exceeded the original size. (It
|
/* Verify that we haven't exceeded the original size. (It
|
||||||
shouldn't happen, hence the assert.) */
|
shouldn't happen, hence the assert.) */
|
||||||
|
@ -612,9 +612,12 @@ get_urls_html (const char *file, const char *url, int *meta_disallow_follow)
|
|||||||
init_interesting ();
|
init_interesting ();
|
||||||
|
|
||||||
/* Specify MHT_TRIM_VALUES because of buggy HTML generators that
|
/* Specify MHT_TRIM_VALUES because of buggy HTML generators that
|
||||||
generate <a href=" foo"> instead of <a href="foo"> (Netscape
|
generate <a href=" foo"> instead of <a href="foo"> (browsers
|
||||||
ignores spaces as well.) If you really mean space, use &32; or
|
ignore spaces as well.) If you really mean space, use &32; or
|
||||||
%20. */
|
%20. MHT_TRIM_VALUES also causes squashing of embedded newlines,
|
||||||
|
e.g. in <img src="foo.[newline]html">. Such newlines are also
|
||||||
|
ignored by IE and Mozilla and are presumably introduced by
|
||||||
|
writing HTML with editors that force word wrap. */
|
||||||
flags = MHT_TRIM_VALUES;
|
flags = MHT_TRIM_VALUES;
|
||||||
if (opt.strict_comments)
|
if (opt.strict_comments)
|
||||||
flags |= MHT_STRICT_COMMENTS;
|
flags |= MHT_STRICT_COMMENTS;
|
||||||
|
Loading…
Reference in New Issue
Block a user