From 828906b6dcc85e2c7fa663435571ac73a97b52cc Mon Sep 17 00:00:00 2001 From: Paul Smith <psmith@gnu.org> Date: Wed, 10 Jan 2024 00:01:33 -0500 Subject: [PATCH] Create a common method for skipping variable references * README.git: Add some notes about using ASAN. * src/makeint.h: Declare skip_references(). * src/misc.c (skip_reference): A new function that will skip over a variable reference, counting matching open paren/brace characters. * src/implicit.c (get_next_word): Replace code with skip_reference(). * src/read.c (conditional_line): Ditto. (find_map_unquote): Ditto. (get_next_mword): Ditto. (parse_variable_definition): Ditto. * src/function.c (handle_function): Make clear that the passed in pointers are not modified if the function returns false. * src/expand.c (expand_string_buf): Don't create local variables to call handle_function() since it doesn't modify its arguments. * src/job.c (new_job): Small simplifications. --- README.git | 7 ++++ src/expand.c | 27 ++++++--------- src/function.c | 11 +++--- src/implicit.c | 33 +++--------------- src/job.c | 31 ++++++++--------- src/makeint.h | 1 + src/misc.c | 44 ++++++++++++++++++++++++ src/read.c | 92 ++++++-------------------------------------------- src/variable.c | 38 ++------------------- 9 files changed, 99 insertions(+), 185 deletions(-) diff --git a/README.git b/README.git index dc49e9d6..045f6414 100644 --- a/README.git +++ b/README.git @@ -209,6 +209,13 @@ work on non-GNU systems (Windows, MacOS, etc.) make clean make -j8 CFLAGS='-ggdb3 -fsanitize=address' LDFLAGS='-ggdb3 -fsanitize=address' + + Then to check for corruption only but not memory leaks run: + + ASAN_OPTIONS='detect_stack_after_use_return=true:detect_leaks=false' make check + + To check for leaks too run: + make check Note that ASAN is reporting many more errors than valgrind. I don't know diff --git a/src/expand.c b/src/expand.c index e840bc48..66c55e95 100644 --- a/src/expand.c +++ b/src/expand.c @@ -382,47 +382,40 @@ expand_string_buf (char *buf, const char *string, size_t length) { char openparen = *p; char closeparen = (openparen == '(') ? ')' : '}'; - const char *begp; const char *beg = p + 1; - char *op; char *abeg = NULL; const char *end, *colon; - op = o; - begp = p; - if (handle_function (&op, &begp)) - { - o = op; - p = begp; - break; - } + if (handle_function (&o, &p)) + break; /* Is there a variable reference inside the parens or braces? If so, expand it before expanding the entire reference. */ end = strchr (beg, closeparen); - if (end == 0) + if (end == NULL) /* Unterminated variable reference. */ O (fatal, *expanding_var, _("unterminated variable reference")); p1 = lindex (beg, end, '$'); - if (p1 != 0) + if (p1 != NULL) { /* BEG now points past the opening paren or brace. Count parens or braces until it is matched. */ - int count = 0; + int count = 1; for (p = beg; *p != '\0'; ++p) { if (*p == openparen) ++count; - else if (*p == closeparen && --count < 0) + else if (*p == closeparen && --count == 0) break; } - /* If COUNT is >= 0, there were unmatched opening parens + /* If COUNT is > 0, there were unmatched opening parens or braces, so we go to the simple case of a variable name such as '$($(a)'. */ - if (count < 0) + if (count == 0) { - abeg = expand_argument (beg, p); /* Expand the name. */ + /* Expand the name. */ + abeg = expand_argument (beg, p); beg = abeg; end = strchr (beg, '\0'); } diff --git a/src/function.c b/src/function.c index 5e7285a5..ab89d9de 100644 --- a/src/function.c +++ b/src/function.c @@ -2484,7 +2484,8 @@ expand_builtin_function (char *o, unsigned int argc, char **argv, /* Check for a function invocation in *STRINGP. *STRINGP points at the opening ( or { and is not null-terminated. If a function invocation is found, expand it into the buffer at *OP, updating *OP, incrementing - *STRINGP past the reference and returning nonzero. If not, return zero. */ + *STRINGP past the reference, and return nonzero. + If no function is found, return zero and don't change *OP or *STRINGP. */ int handle_function (char **op, const char **stringp) @@ -2512,10 +2513,10 @@ handle_function (char **op, const char **stringp) beg += entry_p->len; NEXT_TOKEN (beg); - /* Find the end of the function invocation, counting nested use of - whichever kind of parens we use. Since we're looking, count commas - to get a rough estimate of how many arguments we might have. The - count might be high, but it'll never be low. */ + /* Find the end of the function invocation, counting nested use of whichever + kind of parens we use. Don't use skip_reference so we can count commas + to get a rough estimate of how many arguments we might have. The count + might be high, but it'll never be low. */ for (nargs=1, end=beg; *end != '\0'; ++end) if (!STOP_SET (*end, MAP_VARSEP|MAP_COMMA)) diff --git a/src/implicit.c b/src/implicit.c index 6806e5cd..134e3fef 100644 --- a/src/implicit.c +++ b/src/implicit.c @@ -87,12 +87,10 @@ get_next_word (const char *buffer, size_t *length) return 0; - /* We already found the first value of "c", above. */ while (1) { - char closeparen; - int count; - + /* Each time through the loop, "c" has the current char + and "p" points to the next char. */ switch (c) { case '\0': @@ -101,31 +99,8 @@ get_next_word (const char *buffer, size_t *length) goto done_word; case '$': - c = *(p++); - if (c == '$') - break; - - /* This is a variable reference, so read it to the matching - close paren. */ - - if (c == '(') - closeparen = ')'; - else if (c == '{') - closeparen = '}'; - else - /* This is a single-letter variable reference. */ - break; - - for (count = 0; *p != '\0'; ++p) - { - if (*p == c) - ++count; - else if (*p == closeparen && --count < 0) - { - ++p; - break; - } - } + /* This is a variable reference, so skip it. */ + p = skip_reference (p); break; case '|': diff --git a/src/job.c b/src/job.c index dfd844dc..45e45c5c 100644 --- a/src/job.c +++ b/src/job.c @@ -1726,14 +1726,13 @@ new_job (struct file *file) *out++ = *in++; /* Copy OPENPAREN. */ outref = out; - /* IN now points past the opening paren or brace. - Count parens or braces until it is matched. */ + /* IN now points past the opening paren or brace. Count parens + or braces until it is matched. We don't use skip_reference + since we want to handle internal backslash/newlines. */ count = 0; while (*in != '\0') { - if (*in == closeparen && --count < 0) - break; - else if (*in == '\\' && in[1] == '\n') + if (*in == '\\' && in[1] == '\n') { /* We have found a backslash-newline inside a variable or function reference. Eat it and @@ -1744,11 +1743,11 @@ new_job (struct file *file) quoted = !quoted; if (quoted) - /* There were two or more backslashes, so this is - not really a continuation line. We don't collapse - the quoting backslashes here as is done in - collapse_continuations, because the line will - be collapsed again after expansion. */ + /* There were an even number of backslashes, so this + is not really a continuation line. We don't + collapse the quoting backslashes here as is done in + collapse_continuations, because the line will be + collapsed again after expansion. */ *out++ = *in++; else { @@ -1764,14 +1763,14 @@ new_job (struct file *file) /* Replace it all with a single space. */ *out++ = ' '; } + continue; } - else - { - if (*in == openparen) - ++count; + if (*in == closeparen && --count < 0) + break; + if (*in == openparen) + ++count; - *out++ = *in++; - } + *out++ = *in++; } } } diff --git a/src/makeint.h b/src/makeint.h index 00289b9c..869cf461 100644 --- a/src/makeint.h +++ b/src/makeint.h @@ -601,6 +601,7 @@ char *xstrndup (const char *, size_t); char *find_next_token (const char **, size_t *); char *next_token (const char *); char *end_of_token (const char *); +char *skip_reference (const char *); void collapse_continuations (char *); char *lindex (const char *, const char *, int); int alpha_compare (const void *, const void *); diff --git a/src/misc.c b/src/misc.c index 64309900..a53301df 100644 --- a/src/misc.c +++ b/src/misc.c @@ -418,6 +418,50 @@ next_token (const char *s) return (char *)s; } +/* This function returns P if P points to EOS, or P+1 if P is NOT an open + paren or brace, or a pointer to the character after the matching close + paren or brace, skipping matched internal parens or braces. + + It is typically called when we have seen a '$' in a string and we want to + treat it as a variable reference and find the end of it: in that case P + should point to the character after the '$'. */ + +char * +skip_reference (const char *p) +{ + char openparen = *p; + char closeparen; + int count = 1; + + if (openparen == '\0') + return (char*)p; + + if (openparen == '(') + closeparen = ')'; + else if (openparen == '{') + closeparen = '}'; + else + return (char*)(p+1); + + while (1) + { + ++p; + if (!STOP_SET (*p, MAP_NUL|MAP_VARSEP)) + continue; + if (*p == '\0') + break; + if (*p == openparen) + ++count; + else if (*p == closeparen && --count == 0) + { + ++p; + break; + } + } + + return (char*)p; +} + /* Find the next token in PTR; return the address of it, and store the length of the token into *LENGTHPTR if LENGTHPTR is not nil. Set *PTR to the end of the token, so this function can be called repeatedly in a loop. */ diff --git a/src/read.c b/src/read.c index e97d3061..c5a635f9 100644 --- a/src/read.c +++ b/src/read.c @@ -1669,33 +1669,10 @@ conditional_line (char *line, size_t len, const floc *flocp) s1 = ++line; /* Find the end of the first string. */ - if (termin == ',') - { - int count = 0; - char *delim = xmalloc (strlen (line)); - while (*line != '\0') - { - if (*line == '$') - { - ++line; - if (*line == '(') - delim[count++] = ')'; - else if (*line == '{') - delim[count++] = '}'; - } - else if (count == 0) - { - if (*line == ',') - break; - } - else if (*line == delim[count-1]) - --count; - ++line; - } - free (delim); - } - else - while (*line != '\0' && *line != termin) + while (*line != '\0' && *line != termin) + if (*line == '$') + line = skip_reference (line+1); + else ++line; if (*line == '\0') @@ -1703,7 +1680,7 @@ conditional_line (char *line, size_t len, const floc *flocp) if (termin == ',') { - /* Strip blanks after the first string. */ + /* Strip blanks before the comma. */ char *p = line++; while (ISBLANK (p[-1])) --p; @@ -2355,35 +2332,7 @@ find_map_unquote (char *string, int stopmap) /* If we stopped due to a variable reference, skip over its contents. */ if (*p == '$') { - char openparen = p[1]; - - /* Check if '$' is the last character in the string. */ - if (openparen == '\0') - break; - - p += 2; - - /* Skip the contents of a non-quoted, multi-char variable ref. */ - if (openparen == '(' || openparen == '{') - { - unsigned int pcount = 1; - char closeparen = (openparen == '(' ? ')' : '}'); - - while (*p) - { - if (*p == openparen) - ++pcount; - else if (*p == closeparen) - if (--pcount == 0) - { - ++p; - break; - } - ++p; - } - } - - /* Skipped the variable reference: look for STOPCHARS again. */ + p = skip_reference (p+1); continue; } @@ -2851,12 +2800,10 @@ get_next_mword (char *buffer, char **startp, size_t *length) adjust our assumptions then. */ wtype = w_static; - /* We already found the first value of "c", above. */ while (1) { - char closeparen; - int count; - + /* Each time through the loop, "c" has the current character + and "p" points to the next character. */ if (END_OF_TOKEN (c)) goto done_word; @@ -2883,28 +2830,9 @@ get_next_mword (char *buffer, char **startp, size_t *length) if (c == '\0') goto done_word; - /* This is a variable reference, so note that it's expandable. - Then read it to the matching close paren. */ + /* This is a variable reference: note that then skip it. */ wtype = w_variable; - - if (c == '(') - closeparen = ')'; - else if (c == '{') - closeparen = '}'; - else - /* This is a single-letter variable reference. */ - break; - - for (count=0; *p != '\0'; ++p) - { - if (*p == c) - ++count; - else if (*p == closeparen && --count < 0) - { - ++p; - break; - } - } + p = skip_reference (p-1); break; case '?': diff --git a/src/variable.c b/src/variable.c index 9d4aec42..8b63df2d 100644 --- a/src/variable.c +++ b/src/variable.c @@ -1732,7 +1732,7 @@ parse_variable_definition (const char *str, struct variable *var) if (!end) end = p - 1; - /* We need to distinguish :=, ::=, and :::=, and : outside of an + /* We need to distinguish :=, ::=, and :::=, versus : outside of an assignment (which means this is not a variable definition). */ c = *p++; if (c == '=') @@ -1789,41 +1789,7 @@ parse_variable_definition (const char *str, struct variable *var) return NULL; if (c == '$') - { - /* Skip any variable reference, to ensure we don't treat chars - inside the reference as assignment operators. */ - char closeparen; - unsigned int count; - - c = *p++; - switch (c) - { - case '(': - closeparen = ')'; - break; - case '{': - closeparen = '}'; - break; - case '\0': - return NULL; - default: - /* '$$' or '$X': skip it. */ - continue; - } - - /* P now points past the opening paren or brace. Count parens or - braces until we find the closing paren/brace. */ - for (count = 1; *p != '\0'; ++p) - { - if (*p == closeparen && --count == 0) - { - ++p; - break; - } - if (*p == c) - ++count; - } - } + p = skip_reference (p); } /* We found a valid variable assignment: END points to the char after the