[svn] Better version of read_whole_line().

Published in <sxsr94jd7z4.fsf@florida.arsdigita.de>.
This commit is contained in:
hniksic 2000-11-10 10:01:35 -08:00
parent e18ca280fb
commit e1f1c1ff40
5 changed files with 61 additions and 33 deletions

View File

@ -1,3 +1,19 @@
2000-11-10 Hrvoje Niksic <hniksic@arsdigita.com>
* init.c (run_wgetrc): Don't bother killing off '\r' since
pars_line() skips whitespace at end of line anyway.
(parse_line): Oops, it didn't. Now it does.
* recur.c (parse_robots): Ditto here.
* ftp-ls.c (ftp_parse_unix_ls): Kill off the newline character
manually because read_whole_line no longer does.
* utils.c (read_whole_line): Rewrite to: a) use less memory
(reallocates to needed size after work), b) work faster -->
fgets() instead of getc, c) be more correct --> doesn't kill the
newline character at the end of line.
2000-11-10 Hrvoje Niksic <hniksic@arsdigita.com>
* init.c (comind): Initialize MAX to array size - 1.

View File

@ -95,7 +95,9 @@ ftp_parse_unix_ls (const char *file)
{
DEBUGP (("%s\n", line));
len = strlen (line);
/* Destroy <CR> if there is one. */
/* Destroy <CR><LF> if present. */
if (len && line[len - 1] == '\n')
line[--len] = '\0';
if (len && line[len - 1] == '\r')
line[--len] = '\0';

View File

@ -336,10 +336,7 @@ run_wgetrc (const char *file)
{
char *com, *val;
int status;
int length = strlen (line);
if (length && line[length - 1] == '\r')
line[length - 1] = '\0';
/* Parse the line. */
status = parse_line (line, &com, &val);
free (line);
@ -411,12 +408,12 @@ parse_line (const char *line, char **com, char **val)
const char *orig_comptr, *end;
char *new_comptr;
/* Skip spaces. */
while (*p == ' ' || *p == '\t')
/* Skip whitespace. */
while (*p && ISSPACE (*p))
++p;
/* Don't process empty lines. */
if (!*p || *p == '\n' || *p == '#')
if (!*p || *p == '#')
return -1;
for (orig_comptr = p; ISALPHA (*p) || *p == '_' || *p == '-'; p++)
@ -424,6 +421,8 @@ parse_line (const char *line, char **com, char **val)
/* The next char should be space or '='. */
if (!ISSPACE (*p) && (*p != '='))
return 0;
/* Here we cannot use strdupdelim() as we normally would because we
want to skip the `-' and `_' characters in the input string. */
*com = (char *)xmalloc (p - orig_comptr + 1);
for (new_comptr = *com; orig_comptr < p; orig_comptr++)
{
@ -449,10 +448,12 @@ parse_line (const char *line, char **com, char **val)
}
/* Skip spaces after '='. */
for (++p; ISSPACE (*p); p++);
/* Get the ending position. */
for (end = p; *end && *end != '\n'; end++);
/* Allocate *val, and copy from line. */
*val = strdupdelim (p, end);
/* Get the ending position for VAL by starting with the end of the
line and skipping whitespace. */
end = line + strlen (line) - 1;
while (end > p && ISSPACE (*end))
--end;
*val = strdupdelim (p, end + 1);
return 1;
}

View File

@ -702,9 +702,11 @@ parse_robots (const char *robots_filename)
while ((line = read_whole_line (fp)))
{
len = strlen (line);
/* Destroy <CR> if there is one. */
/* Destroy <CR><LF> if present. */
if (len && line[len - 1] == '\n')
line[--len] = '\0';
if (len && line[len - 1] == '\r')
line[len - 1] = '\0';
line[--len] = '\0';
/* According to specifications, optional space may be at the
end... */
DEBUGP (("Line: %s\n", line));

View File

@ -45,6 +45,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
#ifdef NeXT
# include <libc.h> /* for access() */
#endif
#include <assert.h>
#include "wget.h"
#include "utils.h"
@ -696,37 +697,43 @@ suffix (const char *str)
/* Read a line from FP. The function reallocs the storage as needed
to accomodate for any length of the line. Reallocs are done
storage exponentially, doubling the storage after each overflow to
minimize the number of calls to realloc().
minimize the number of calls to realloc() and fgets(). The newline
character at the end of line is retained.
After end-of-file is encountered without anything being read, NULL
is returned. NULL is also returned on error. To distinguish
between these two cases, use the stdio function ferror(). */
It is not an exemplary of correctness, since it kills off the
newline (and no, there is no way to know if there was a newline at
EOF). */
char *
read_whole_line (FILE *fp)
{
char *line;
int i, bufsize, c;
int length = 0;
int bufsize = 81;
char *line = (char *)xmalloc (bufsize);
i = 0;
bufsize = 40;
line = (char *)xmalloc (bufsize);
/* Construct the line. */
while ((c = getc (fp)) != EOF && c != '\n')
while (fgets (line + length, bufsize - length, fp))
{
if (i > bufsize - 1)
line = (char *)xrealloc (line, (bufsize <<= 1));
line[i++] = c;
length += strlen (line + length);
assert (length > 0);
if (line[length - 1] == '\n')
break;
/* fgets() guarantees to read the whole line, or to use up the
space we've given it. We can double the buffer
unconditionally. */
bufsize <<= 1;
line = xrealloc (line, bufsize);
}
if (c == EOF && !i)
if (length == 0 || ferror (fp))
{
free (line);
return NULL;
}
/* Check for overflow at zero-termination (no need to double the
buffer in this case. */
if (i == bufsize)
line = (char *)xrealloc (line, i + 1);
line[i] = '\0';
if (length + 1 < bufsize)
/* Relieve the memory from our exponential greediness. We say
`length + 1' because the terminating \0 is not included in
LENGTH. We don't need to zero-terminate the string ourselves,
though, because fgets() does that. */
line = xrealloc (line, length + 1);
return line;
}