mirror of
https://github.com/mirror/wget.git
synced 2025-01-01 07:50:11 +08:00
[svn] Better version of read_whole_line().
Published in <sxsr94jd7z4.fsf@florida.arsdigita.de>.
This commit is contained in:
parent
e18ca280fb
commit
e1f1c1ff40
@ -1,3 +1,19 @@
|
||||
2000-11-10 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||
|
||||
* init.c (run_wgetrc): Don't bother killing off '\r' since
|
||||
pars_line() skips whitespace at end of line anyway.
|
||||
(parse_line): Oops, it didn't. Now it does.
|
||||
|
||||
* recur.c (parse_robots): Ditto here.
|
||||
|
||||
* ftp-ls.c (ftp_parse_unix_ls): Kill off the newline character
|
||||
manually because read_whole_line no longer does.
|
||||
|
||||
* utils.c (read_whole_line): Rewrite to: a) use less memory
|
||||
(reallocates to needed size after work), b) work faster -->
|
||||
fgets() instead of getc, c) be more correct --> doesn't kill the
|
||||
newline character at the end of line.
|
||||
|
||||
2000-11-10 Hrvoje Niksic <hniksic@arsdigita.com>
|
||||
|
||||
* init.c (comind): Initialize MAX to array size - 1.
|
||||
|
@ -95,7 +95,9 @@ ftp_parse_unix_ls (const char *file)
|
||||
{
|
||||
DEBUGP (("%s\n", line));
|
||||
len = strlen (line);
|
||||
/* Destroy <CR> if there is one. */
|
||||
/* Destroy <CR><LF> if present. */
|
||||
if (len && line[len - 1] == '\n')
|
||||
line[--len] = '\0';
|
||||
if (len && line[len - 1] == '\r')
|
||||
line[--len] = '\0';
|
||||
|
||||
|
21
src/init.c
21
src/init.c
@ -336,10 +336,7 @@ run_wgetrc (const char *file)
|
||||
{
|
||||
char *com, *val;
|
||||
int status;
|
||||
int length = strlen (line);
|
||||
|
||||
if (length && line[length - 1] == '\r')
|
||||
line[length - 1] = '\0';
|
||||
/* Parse the line. */
|
||||
status = parse_line (line, &com, &val);
|
||||
free (line);
|
||||
@ -411,12 +408,12 @@ parse_line (const char *line, char **com, char **val)
|
||||
const char *orig_comptr, *end;
|
||||
char *new_comptr;
|
||||
|
||||
/* Skip spaces. */
|
||||
while (*p == ' ' || *p == '\t')
|
||||
/* Skip whitespace. */
|
||||
while (*p && ISSPACE (*p))
|
||||
++p;
|
||||
|
||||
/* Don't process empty lines. */
|
||||
if (!*p || *p == '\n' || *p == '#')
|
||||
if (!*p || *p == '#')
|
||||
return -1;
|
||||
|
||||
for (orig_comptr = p; ISALPHA (*p) || *p == '_' || *p == '-'; p++)
|
||||
@ -424,6 +421,8 @@ parse_line (const char *line, char **com, char **val)
|
||||
/* The next char should be space or '='. */
|
||||
if (!ISSPACE (*p) && (*p != '='))
|
||||
return 0;
|
||||
/* Here we cannot use strdupdelim() as we normally would because we
|
||||
want to skip the `-' and `_' characters in the input string. */
|
||||
*com = (char *)xmalloc (p - orig_comptr + 1);
|
||||
for (new_comptr = *com; orig_comptr < p; orig_comptr++)
|
||||
{
|
||||
@ -449,10 +448,12 @@ parse_line (const char *line, char **com, char **val)
|
||||
}
|
||||
/* Skip spaces after '='. */
|
||||
for (++p; ISSPACE (*p); p++);
|
||||
/* Get the ending position. */
|
||||
for (end = p; *end && *end != '\n'; end++);
|
||||
/* Allocate *val, and copy from line. */
|
||||
*val = strdupdelim (p, end);
|
||||
/* Get the ending position for VAL by starting with the end of the
|
||||
line and skipping whitespace. */
|
||||
end = line + strlen (line) - 1;
|
||||
while (end > p && ISSPACE (*end))
|
||||
--end;
|
||||
*val = strdupdelim (p, end + 1);
|
||||
return 1;
|
||||
}
|
||||
|
||||
|
@ -702,9 +702,11 @@ parse_robots (const char *robots_filename)
|
||||
while ((line = read_whole_line (fp)))
|
||||
{
|
||||
len = strlen (line);
|
||||
/* Destroy <CR> if there is one. */
|
||||
/* Destroy <CR><LF> if present. */
|
||||
if (len && line[len - 1] == '\n')
|
||||
line[--len] = '\0';
|
||||
if (len && line[len - 1] == '\r')
|
||||
line[len - 1] = '\0';
|
||||
line[--len] = '\0';
|
||||
/* According to specifications, optional space may be at the
|
||||
end... */
|
||||
DEBUGP (("Line: %s\n", line));
|
||||
|
47
src/utils.c
47
src/utils.c
@ -45,6 +45,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
|
||||
#ifdef NeXT
|
||||
# include <libc.h> /* for access() */
|
||||
#endif
|
||||
#include <assert.h>
|
||||
|
||||
#include "wget.h"
|
||||
#include "utils.h"
|
||||
@ -696,37 +697,43 @@ suffix (const char *str)
|
||||
/* Read a line from FP. The function reallocs the storage as needed
|
||||
to accomodate for any length of the line. Reallocs are done
|
||||
storage exponentially, doubling the storage after each overflow to
|
||||
minimize the number of calls to realloc().
|
||||
minimize the number of calls to realloc() and fgets(). The newline
|
||||
character at the end of line is retained.
|
||||
|
||||
After end-of-file is encountered without anything being read, NULL
|
||||
is returned. NULL is also returned on error. To distinguish
|
||||
between these two cases, use the stdio function ferror(). */
|
||||
|
||||
It is not an exemplary of correctness, since it kills off the
|
||||
newline (and no, there is no way to know if there was a newline at
|
||||
EOF). */
|
||||
char *
|
||||
read_whole_line (FILE *fp)
|
||||
{
|
||||
char *line;
|
||||
int i, bufsize, c;
|
||||
int length = 0;
|
||||
int bufsize = 81;
|
||||
char *line = (char *)xmalloc (bufsize);
|
||||
|
||||
i = 0;
|
||||
bufsize = 40;
|
||||
line = (char *)xmalloc (bufsize);
|
||||
/* Construct the line. */
|
||||
while ((c = getc (fp)) != EOF && c != '\n')
|
||||
while (fgets (line + length, bufsize - length, fp))
|
||||
{
|
||||
if (i > bufsize - 1)
|
||||
line = (char *)xrealloc (line, (bufsize <<= 1));
|
||||
line[i++] = c;
|
||||
length += strlen (line + length);
|
||||
assert (length > 0);
|
||||
if (line[length - 1] == '\n')
|
||||
break;
|
||||
/* fgets() guarantees to read the whole line, or to use up the
|
||||
space we've given it. We can double the buffer
|
||||
unconditionally. */
|
||||
bufsize <<= 1;
|
||||
line = xrealloc (line, bufsize);
|
||||
}
|
||||
if (c == EOF && !i)
|
||||
if (length == 0 || ferror (fp))
|
||||
{
|
||||
free (line);
|
||||
return NULL;
|
||||
}
|
||||
/* Check for overflow at zero-termination (no need to double the
|
||||
buffer in this case. */
|
||||
if (i == bufsize)
|
||||
line = (char *)xrealloc (line, i + 1);
|
||||
line[i] = '\0';
|
||||
if (length + 1 < bufsize)
|
||||
/* Relieve the memory from our exponential greediness. We say
|
||||
`length + 1' because the terminating \0 is not included in
|
||||
LENGTH. We don't need to zero-terminate the string ourselves,
|
||||
though, because fgets() does that. */
|
||||
line = xrealloc (line, length + 1);
|
||||
return line;
|
||||
}
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user