diff --git a/src/ChangeLog b/src/ChangeLog index e050bbbd..0a18cd33 100644 --- a/src/ChangeLog +++ b/src/ChangeLog @@ -1,3 +1,19 @@ +2000-11-10 Hrvoje Niksic + + * init.c (run_wgetrc): Don't bother killing off '\r' since + pars_line() skips whitespace at end of line anyway. + (parse_line): Oops, it didn't. Now it does. + + * recur.c (parse_robots): Ditto here. + + * ftp-ls.c (ftp_parse_unix_ls): Kill off the newline character + manually because read_whole_line no longer does. + + * utils.c (read_whole_line): Rewrite to: a) use less memory + (reallocates to needed size after work), b) work faster --> + fgets() instead of getc, c) be more correct --> doesn't kill the + newline character at the end of line. + 2000-11-10 Hrvoje Niksic * init.c (comind): Initialize MAX to array size - 1. diff --git a/src/ftp-ls.c b/src/ftp-ls.c index 4fd87bab..16a7f7d6 100644 --- a/src/ftp-ls.c +++ b/src/ftp-ls.c @@ -95,7 +95,9 @@ ftp_parse_unix_ls (const char *file) { DEBUGP (("%s\n", line)); len = strlen (line); - /* Destroy if there is one. */ + /* Destroy if present. */ + if (len && line[len - 1] == '\n') + line[--len] = '\0'; if (len && line[len - 1] == '\r') line[--len] = '\0'; diff --git a/src/init.c b/src/init.c index 38b065fa..b511c724 100644 --- a/src/init.c +++ b/src/init.c @@ -336,10 +336,7 @@ run_wgetrc (const char *file) { char *com, *val; int status; - int length = strlen (line); - if (length && line[length - 1] == '\r') - line[length - 1] = '\0'; /* Parse the line. */ status = parse_line (line, &com, &val); free (line); @@ -411,12 +408,12 @@ parse_line (const char *line, char **com, char **val) const char *orig_comptr, *end; char *new_comptr; - /* Skip spaces. */ - while (*p == ' ' || *p == '\t') + /* Skip whitespace. */ + while (*p && ISSPACE (*p)) ++p; /* Don't process empty lines. */ - if (!*p || *p == '\n' || *p == '#') + if (!*p || *p == '#') return -1; for (orig_comptr = p; ISALPHA (*p) || *p == '_' || *p == '-'; p++) @@ -424,6 +421,8 @@ parse_line (const char *line, char **com, char **val) /* The next char should be space or '='. */ if (!ISSPACE (*p) && (*p != '=')) return 0; + /* Here we cannot use strdupdelim() as we normally would because we + want to skip the `-' and `_' characters in the input string. */ *com = (char *)xmalloc (p - orig_comptr + 1); for (new_comptr = *com; orig_comptr < p; orig_comptr++) { @@ -449,10 +448,12 @@ parse_line (const char *line, char **com, char **val) } /* Skip spaces after '='. */ for (++p; ISSPACE (*p); p++); - /* Get the ending position. */ - for (end = p; *end && *end != '\n'; end++); - /* Allocate *val, and copy from line. */ - *val = strdupdelim (p, end); + /* Get the ending position for VAL by starting with the end of the + line and skipping whitespace. */ + end = line + strlen (line) - 1; + while (end > p && ISSPACE (*end)) + --end; + *val = strdupdelim (p, end + 1); return 1; } diff --git a/src/recur.c b/src/recur.c index 4183fec9..52cc8e12 100644 --- a/src/recur.c +++ b/src/recur.c @@ -702,9 +702,11 @@ parse_robots (const char *robots_filename) while ((line = read_whole_line (fp))) { len = strlen (line); - /* Destroy if there is one. */ + /* Destroy if present. */ + if (len && line[len - 1] == '\n') + line[--len] = '\0'; if (len && line[len - 1] == '\r') - line[len - 1] = '\0'; + line[--len] = '\0'; /* According to specifications, optional space may be at the end... */ DEBUGP (("Line: %s\n", line)); diff --git a/src/utils.c b/src/utils.c index 371afd6d..795ecb75 100644 --- a/src/utils.c +++ b/src/utils.c @@ -45,6 +45,7 @@ Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ #ifdef NeXT # include /* for access() */ #endif +#include #include "wget.h" #include "utils.h" @@ -696,37 +697,43 @@ suffix (const char *str) /* Read a line from FP. The function reallocs the storage as needed to accomodate for any length of the line. Reallocs are done storage exponentially, doubling the storage after each overflow to - minimize the number of calls to realloc(). + minimize the number of calls to realloc() and fgets(). The newline + character at the end of line is retained. + + After end-of-file is encountered without anything being read, NULL + is returned. NULL is also returned on error. To distinguish + between these two cases, use the stdio function ferror(). */ - It is not an exemplary of correctness, since it kills off the - newline (and no, there is no way to know if there was a newline at - EOF). */ char * read_whole_line (FILE *fp) { - char *line; - int i, bufsize, c; + int length = 0; + int bufsize = 81; + char *line = (char *)xmalloc (bufsize); - i = 0; - bufsize = 40; - line = (char *)xmalloc (bufsize); - /* Construct the line. */ - while ((c = getc (fp)) != EOF && c != '\n') + while (fgets (line + length, bufsize - length, fp)) { - if (i > bufsize - 1) - line = (char *)xrealloc (line, (bufsize <<= 1)); - line[i++] = c; + length += strlen (line + length); + assert (length > 0); + if (line[length - 1] == '\n') + break; + /* fgets() guarantees to read the whole line, or to use up the + space we've given it. We can double the buffer + unconditionally. */ + bufsize <<= 1; + line = xrealloc (line, bufsize); } - if (c == EOF && !i) + if (length == 0 || ferror (fp)) { free (line); return NULL; } - /* Check for overflow at zero-termination (no need to double the - buffer in this case. */ - if (i == bufsize) - line = (char *)xrealloc (line, i + 1); - line[i] = '\0'; + if (length + 1 < bufsize) + /* Relieve the memory from our exponential greediness. We say + `length + 1' because the terminating \0 is not included in + LENGTH. We don't need to zero-terminate the string ourselves, + though, because fgets() does that. */ + line = xrealloc (line, length + 1); return line; }