Create a common method for skipping variable references

* README.git: Add some notes about using ASAN.
* src/makeint.h: Declare skip_references().
* src/misc.c (skip_reference): A new function that will skip over a
variable reference, counting matching open paren/brace characters.
* src/implicit.c (get_next_word): Replace code with skip_reference().
* src/read.c (conditional_line): Ditto.
(find_map_unquote): Ditto.
(get_next_mword): Ditto.
(parse_variable_definition): Ditto.
* src/function.c (handle_function): Make clear that the passed in
pointers are not modified if the function returns false.
* src/expand.c (expand_string_buf): Don't create local variables to
call handle_function() since it doesn't modify its arguments.
* src/job.c (new_job): Small simplifications.
This commit is contained in:
Paul Smith 2024-01-10 00:01:33 -05:00
parent b936970956
commit 828906b6dc
9 changed files with 99 additions and 185 deletions

View File

@ -209,6 +209,13 @@ work on non-GNU systems (Windows, MacOS, etc.)
make clean make clean
make -j8 CFLAGS='-ggdb3 -fsanitize=address' LDFLAGS='-ggdb3 -fsanitize=address' make -j8 CFLAGS='-ggdb3 -fsanitize=address' LDFLAGS='-ggdb3 -fsanitize=address'
Then to check for corruption only but not memory leaks run:
ASAN_OPTIONS='detect_stack_after_use_return=true:detect_leaks=false' make check
To check for leaks too run:
make check make check
Note that ASAN is reporting many more errors than valgrind. I don't know Note that ASAN is reporting many more errors than valgrind. I don't know

View File

@ -382,47 +382,40 @@ expand_string_buf (char *buf, const char *string, size_t length)
{ {
char openparen = *p; char openparen = *p;
char closeparen = (openparen == '(') ? ')' : '}'; char closeparen = (openparen == '(') ? ')' : '}';
const char *begp;
const char *beg = p + 1; const char *beg = p + 1;
char *op;
char *abeg = NULL; char *abeg = NULL;
const char *end, *colon; const char *end, *colon;
op = o; if (handle_function (&o, &p))
begp = p; break;
if (handle_function (&op, &begp))
{
o = op;
p = begp;
break;
}
/* Is there a variable reference inside the parens or braces? /* Is there a variable reference inside the parens or braces?
If so, expand it before expanding the entire reference. */ If so, expand it before expanding the entire reference. */
end = strchr (beg, closeparen); end = strchr (beg, closeparen);
if (end == 0) if (end == NULL)
/* Unterminated variable reference. */ /* Unterminated variable reference. */
O (fatal, *expanding_var, _("unterminated variable reference")); O (fatal, *expanding_var, _("unterminated variable reference"));
p1 = lindex (beg, end, '$'); p1 = lindex (beg, end, '$');
if (p1 != 0) if (p1 != NULL)
{ {
/* BEG now points past the opening paren or brace. /* BEG now points past the opening paren or brace.
Count parens or braces until it is matched. */ Count parens or braces until it is matched. */
int count = 0; int count = 1;
for (p = beg; *p != '\0'; ++p) for (p = beg; *p != '\0'; ++p)
{ {
if (*p == openparen) if (*p == openparen)
++count; ++count;
else if (*p == closeparen && --count < 0) else if (*p == closeparen && --count == 0)
break; break;
} }
/* If COUNT is >= 0, there were unmatched opening parens /* If COUNT is > 0, there were unmatched opening parens
or braces, so we go to the simple case of a variable name or braces, so we go to the simple case of a variable name
such as '$($(a)'. */ such as '$($(a)'. */
if (count < 0) if (count == 0)
{ {
abeg = expand_argument (beg, p); /* Expand the name. */ /* Expand the name. */
abeg = expand_argument (beg, p);
beg = abeg; beg = abeg;
end = strchr (beg, '\0'); end = strchr (beg, '\0');
} }

View File

@ -2484,7 +2484,8 @@ expand_builtin_function (char *o, unsigned int argc, char **argv,
/* Check for a function invocation in *STRINGP. *STRINGP points at the /* Check for a function invocation in *STRINGP. *STRINGP points at the
opening ( or { and is not null-terminated. If a function invocation opening ( or { and is not null-terminated. If a function invocation
is found, expand it into the buffer at *OP, updating *OP, incrementing is found, expand it into the buffer at *OP, updating *OP, incrementing
*STRINGP past the reference and returning nonzero. If not, return zero. */ *STRINGP past the reference, and return nonzero.
If no function is found, return zero and don't change *OP or *STRINGP. */
int int
handle_function (char **op, const char **stringp) handle_function (char **op, const char **stringp)
@ -2512,10 +2513,10 @@ handle_function (char **op, const char **stringp)
beg += entry_p->len; beg += entry_p->len;
NEXT_TOKEN (beg); NEXT_TOKEN (beg);
/* Find the end of the function invocation, counting nested use of /* Find the end of the function invocation, counting nested use of whichever
whichever kind of parens we use. Since we're looking, count commas kind of parens we use. Don't use skip_reference so we can count commas
to get a rough estimate of how many arguments we might have. The to get a rough estimate of how many arguments we might have. The count
count might be high, but it'll never be low. */ might be high, but it'll never be low. */
for (nargs=1, end=beg; *end != '\0'; ++end) for (nargs=1, end=beg; *end != '\0'; ++end)
if (!STOP_SET (*end, MAP_VARSEP|MAP_COMMA)) if (!STOP_SET (*end, MAP_VARSEP|MAP_COMMA))

View File

@ -87,12 +87,10 @@ get_next_word (const char *buffer, size_t *length)
return 0; return 0;
/* We already found the first value of "c", above. */
while (1) while (1)
{ {
char closeparen; /* Each time through the loop, "c" has the current char
int count; and "p" points to the next char. */
switch (c) switch (c)
{ {
case '\0': case '\0':
@ -101,31 +99,8 @@ get_next_word (const char *buffer, size_t *length)
goto done_word; goto done_word;
case '$': case '$':
c = *(p++); /* This is a variable reference, so skip it. */
if (c == '$') p = skip_reference (p);
break;
/* This is a variable reference, so read it to the matching
close paren. */
if (c == '(')
closeparen = ')';
else if (c == '{')
closeparen = '}';
else
/* This is a single-letter variable reference. */
break;
for (count = 0; *p != '\0'; ++p)
{
if (*p == c)
++count;
else if (*p == closeparen && --count < 0)
{
++p;
break;
}
}
break; break;
case '|': case '|':

View File

@ -1726,14 +1726,13 @@ new_job (struct file *file)
*out++ = *in++; /* Copy OPENPAREN. */ *out++ = *in++; /* Copy OPENPAREN. */
outref = out; outref = out;
/* IN now points past the opening paren or brace. /* IN now points past the opening paren or brace. Count parens
Count parens or braces until it is matched. */ or braces until it is matched. We don't use skip_reference
since we want to handle internal backslash/newlines. */
count = 0; count = 0;
while (*in != '\0') while (*in != '\0')
{ {
if (*in == closeparen && --count < 0) if (*in == '\\' && in[1] == '\n')
break;
else if (*in == '\\' && in[1] == '\n')
{ {
/* We have found a backslash-newline inside a /* We have found a backslash-newline inside a
variable or function reference. Eat it and variable or function reference. Eat it and
@ -1744,11 +1743,11 @@ new_job (struct file *file)
quoted = !quoted; quoted = !quoted;
if (quoted) if (quoted)
/* There were two or more backslashes, so this is /* There were an even number of backslashes, so this
not really a continuation line. We don't collapse is not really a continuation line. We don't
the quoting backslashes here as is done in collapse the quoting backslashes here as is done in
collapse_continuations, because the line will collapse_continuations, because the line will be
be collapsed again after expansion. */ collapsed again after expansion. */
*out++ = *in++; *out++ = *in++;
else else
{ {
@ -1764,14 +1763,14 @@ new_job (struct file *file)
/* Replace it all with a single space. */ /* Replace it all with a single space. */
*out++ = ' '; *out++ = ' ';
} }
continue;
} }
else if (*in == closeparen && --count < 0)
{ break;
if (*in == openparen) if (*in == openparen)
++count; ++count;
*out++ = *in++; *out++ = *in++;
}
} }
} }
} }

View File

@ -601,6 +601,7 @@ char *xstrndup (const char *, size_t);
char *find_next_token (const char **, size_t *); char *find_next_token (const char **, size_t *);
char *next_token (const char *); char *next_token (const char *);
char *end_of_token (const char *); char *end_of_token (const char *);
char *skip_reference (const char *);
void collapse_continuations (char *); void collapse_continuations (char *);
char *lindex (const char *, const char *, int); char *lindex (const char *, const char *, int);
int alpha_compare (const void *, const void *); int alpha_compare (const void *, const void *);

View File

@ -418,6 +418,50 @@ next_token (const char *s)
return (char *)s; return (char *)s;
} }
/* This function returns P if P points to EOS, or P+1 if P is NOT an open
paren or brace, or a pointer to the character after the matching close
paren or brace, skipping matched internal parens or braces.
It is typically called when we have seen a '$' in a string and we want to
treat it as a variable reference and find the end of it: in that case P
should point to the character after the '$'. */
char *
skip_reference (const char *p)
{
char openparen = *p;
char closeparen;
int count = 1;
if (openparen == '\0')
return (char*)p;
if (openparen == '(')
closeparen = ')';
else if (openparen == '{')
closeparen = '}';
else
return (char*)(p+1);
while (1)
{
++p;
if (!STOP_SET (*p, MAP_NUL|MAP_VARSEP))
continue;
if (*p == '\0')
break;
if (*p == openparen)
++count;
else if (*p == closeparen && --count == 0)
{
++p;
break;
}
}
return (char*)p;
}
/* Find the next token in PTR; return the address of it, and store the length /* Find the next token in PTR; return the address of it, and store the length
of the token into *LENGTHPTR if LENGTHPTR is not nil. Set *PTR to the end of the token into *LENGTHPTR if LENGTHPTR is not nil. Set *PTR to the end
of the token, so this function can be called repeatedly in a loop. */ of the token, so this function can be called repeatedly in a loop. */

View File

@ -1669,33 +1669,10 @@ conditional_line (char *line, size_t len, const floc *flocp)
s1 = ++line; s1 = ++line;
/* Find the end of the first string. */ /* Find the end of the first string. */
if (termin == ',') while (*line != '\0' && *line != termin)
{ if (*line == '$')
int count = 0; line = skip_reference (line+1);
char *delim = xmalloc (strlen (line)); else
while (*line != '\0')
{
if (*line == '$')
{
++line;
if (*line == '(')
delim[count++] = ')';
else if (*line == '{')
delim[count++] = '}';
}
else if (count == 0)
{
if (*line == ',')
break;
}
else if (*line == delim[count-1])
--count;
++line;
}
free (delim);
}
else
while (*line != '\0' && *line != termin)
++line; ++line;
if (*line == '\0') if (*line == '\0')
@ -1703,7 +1680,7 @@ conditional_line (char *line, size_t len, const floc *flocp)
if (termin == ',') if (termin == ',')
{ {
/* Strip blanks after the first string. */ /* Strip blanks before the comma. */
char *p = line++; char *p = line++;
while (ISBLANK (p[-1])) while (ISBLANK (p[-1]))
--p; --p;
@ -2355,35 +2332,7 @@ find_map_unquote (char *string, int stopmap)
/* If we stopped due to a variable reference, skip over its contents. */ /* If we stopped due to a variable reference, skip over its contents. */
if (*p == '$') if (*p == '$')
{ {
char openparen = p[1]; p = skip_reference (p+1);
/* Check if '$' is the last character in the string. */
if (openparen == '\0')
break;
p += 2;
/* Skip the contents of a non-quoted, multi-char variable ref. */
if (openparen == '(' || openparen == '{')
{
unsigned int pcount = 1;
char closeparen = (openparen == '(' ? ')' : '}');
while (*p)
{
if (*p == openparen)
++pcount;
else if (*p == closeparen)
if (--pcount == 0)
{
++p;
break;
}
++p;
}
}
/* Skipped the variable reference: look for STOPCHARS again. */
continue; continue;
} }
@ -2851,12 +2800,10 @@ get_next_mword (char *buffer, char **startp, size_t *length)
adjust our assumptions then. */ adjust our assumptions then. */
wtype = w_static; wtype = w_static;
/* We already found the first value of "c", above. */
while (1) while (1)
{ {
char closeparen; /* Each time through the loop, "c" has the current character
int count; and "p" points to the next character. */
if (END_OF_TOKEN (c)) if (END_OF_TOKEN (c))
goto done_word; goto done_word;
@ -2883,28 +2830,9 @@ get_next_mword (char *buffer, char **startp, size_t *length)
if (c == '\0') if (c == '\0')
goto done_word; goto done_word;
/* This is a variable reference, so note that it's expandable. /* This is a variable reference: note that then skip it. */
Then read it to the matching close paren. */
wtype = w_variable; wtype = w_variable;
p = skip_reference (p-1);
if (c == '(')
closeparen = ')';
else if (c == '{')
closeparen = '}';
else
/* This is a single-letter variable reference. */
break;
for (count=0; *p != '\0'; ++p)
{
if (*p == c)
++count;
else if (*p == closeparen && --count < 0)
{
++p;
break;
}
}
break; break;
case '?': case '?':

View File

@ -1732,7 +1732,7 @@ parse_variable_definition (const char *str, struct variable *var)
if (!end) if (!end)
end = p - 1; end = p - 1;
/* We need to distinguish :=, ::=, and :::=, and : outside of an /* We need to distinguish :=, ::=, and :::=, versus : outside of an
assignment (which means this is not a variable definition). */ assignment (which means this is not a variable definition). */
c = *p++; c = *p++;
if (c == '=') if (c == '=')
@ -1789,41 +1789,7 @@ parse_variable_definition (const char *str, struct variable *var)
return NULL; return NULL;
if (c == '$') if (c == '$')
{ p = skip_reference (p);
/* Skip any variable reference, to ensure we don't treat chars
inside the reference as assignment operators. */
char closeparen;
unsigned int count;
c = *p++;
switch (c)
{
case '(':
closeparen = ')';
break;
case '{':
closeparen = '}';
break;
case '\0':
return NULL;
default:
/* '$$' or '$X': skip it. */
continue;
}
/* P now points past the opening paren or brace. Count parens or
braces until we find the closing paren/brace. */
for (count = 1; *p != '\0'; ++p)
{
if (*p == closeparen && --count == 0)
{
++p;
break;
}
if (*p == c)
++count;
}
}
} }
/* We found a valid variable assignment: END points to the char after the /* We found a valid variable assignment: END points to the char after the