Create a common method for skipping variable references

* README.git: Add some notes about using ASAN.
* src/makeint.h: Declare skip_references().
* src/misc.c (skip_reference): A new function that will skip over a
variable reference, counting matching open paren/brace characters.
* src/implicit.c (get_next_word): Replace code with skip_reference().
* src/read.c (conditional_line): Ditto.
(find_map_unquote): Ditto.
(get_next_mword): Ditto.
(parse_variable_definition): Ditto.
* src/function.c (handle_function): Make clear that the passed in
pointers are not modified if the function returns false.
* src/expand.c (expand_string_buf): Don't create local variables to
call handle_function() since it doesn't modify its arguments.
* src/job.c (new_job): Small simplifications.
This commit is contained in:
Paul Smith 2024-01-10 00:01:33 -05:00
parent b936970956
commit 828906b6dc
9 changed files with 99 additions and 185 deletions

View File

@ -209,6 +209,13 @@ work on non-GNU systems (Windows, MacOS, etc.)
make clean
make -j8 CFLAGS='-ggdb3 -fsanitize=address' LDFLAGS='-ggdb3 -fsanitize=address'
Then to check for corruption only but not memory leaks run:
ASAN_OPTIONS='detect_stack_after_use_return=true:detect_leaks=false' make check
To check for leaks too run:
make check
Note that ASAN is reporting many more errors than valgrind. I don't know

View File

@ -382,47 +382,40 @@ expand_string_buf (char *buf, const char *string, size_t length)
{
char openparen = *p;
char closeparen = (openparen == '(') ? ')' : '}';
const char *begp;
const char *beg = p + 1;
char *op;
char *abeg = NULL;
const char *end, *colon;
op = o;
begp = p;
if (handle_function (&op, &begp))
{
o = op;
p = begp;
break;
}
if (handle_function (&o, &p))
break;
/* Is there a variable reference inside the parens or braces?
If so, expand it before expanding the entire reference. */
end = strchr (beg, closeparen);
if (end == 0)
if (end == NULL)
/* Unterminated variable reference. */
O (fatal, *expanding_var, _("unterminated variable reference"));
p1 = lindex (beg, end, '$');
if (p1 != 0)
if (p1 != NULL)
{
/* BEG now points past the opening paren or brace.
Count parens or braces until it is matched. */
int count = 0;
int count = 1;
for (p = beg; *p != '\0'; ++p)
{
if (*p == openparen)
++count;
else if (*p == closeparen && --count < 0)
else if (*p == closeparen && --count == 0)
break;
}
/* If COUNT is >= 0, there were unmatched opening parens
/* If COUNT is > 0, there were unmatched opening parens
or braces, so we go to the simple case of a variable name
such as '$($(a)'. */
if (count < 0)
if (count == 0)
{
abeg = expand_argument (beg, p); /* Expand the name. */
/* Expand the name. */
abeg = expand_argument (beg, p);
beg = abeg;
end = strchr (beg, '\0');
}

View File

@ -2484,7 +2484,8 @@ expand_builtin_function (char *o, unsigned int argc, char **argv,
/* Check for a function invocation in *STRINGP. *STRINGP points at the
opening ( or { and is not null-terminated. If a function invocation
is found, expand it into the buffer at *OP, updating *OP, incrementing
*STRINGP past the reference and returning nonzero. If not, return zero. */
*STRINGP past the reference, and return nonzero.
If no function is found, return zero and don't change *OP or *STRINGP. */
int
handle_function (char **op, const char **stringp)
@ -2512,10 +2513,10 @@ handle_function (char **op, const char **stringp)
beg += entry_p->len;
NEXT_TOKEN (beg);
/* Find the end of the function invocation, counting nested use of
whichever kind of parens we use. Since we're looking, count commas
to get a rough estimate of how many arguments we might have. The
count might be high, but it'll never be low. */
/* Find the end of the function invocation, counting nested use of whichever
kind of parens we use. Don't use skip_reference so we can count commas
to get a rough estimate of how many arguments we might have. The count
might be high, but it'll never be low. */
for (nargs=1, end=beg; *end != '\0'; ++end)
if (!STOP_SET (*end, MAP_VARSEP|MAP_COMMA))

View File

@ -87,12 +87,10 @@ get_next_word (const char *buffer, size_t *length)
return 0;
/* We already found the first value of "c", above. */
while (1)
{
char closeparen;
int count;
/* Each time through the loop, "c" has the current char
and "p" points to the next char. */
switch (c)
{
case '\0':
@ -101,31 +99,8 @@ get_next_word (const char *buffer, size_t *length)
goto done_word;
case '$':
c = *(p++);
if (c == '$')
break;
/* This is a variable reference, so read it to the matching
close paren. */
if (c == '(')
closeparen = ')';
else if (c == '{')
closeparen = '}';
else
/* This is a single-letter variable reference. */
break;
for (count = 0; *p != '\0'; ++p)
{
if (*p == c)
++count;
else if (*p == closeparen && --count < 0)
{
++p;
break;
}
}
/* This is a variable reference, so skip it. */
p = skip_reference (p);
break;
case '|':

View File

@ -1726,14 +1726,13 @@ new_job (struct file *file)
*out++ = *in++; /* Copy OPENPAREN. */
outref = out;
/* IN now points past the opening paren or brace.
Count parens or braces until it is matched. */
/* IN now points past the opening paren or brace. Count parens
or braces until it is matched. We don't use skip_reference
since we want to handle internal backslash/newlines. */
count = 0;
while (*in != '\0')
{
if (*in == closeparen && --count < 0)
break;
else if (*in == '\\' && in[1] == '\n')
if (*in == '\\' && in[1] == '\n')
{
/* We have found a backslash-newline inside a
variable or function reference. Eat it and
@ -1744,11 +1743,11 @@ new_job (struct file *file)
quoted = !quoted;
if (quoted)
/* There were two or more backslashes, so this is
not really a continuation line. We don't collapse
the quoting backslashes here as is done in
collapse_continuations, because the line will
be collapsed again after expansion. */
/* There were an even number of backslashes, so this
is not really a continuation line. We don't
collapse the quoting backslashes here as is done in
collapse_continuations, because the line will be
collapsed again after expansion. */
*out++ = *in++;
else
{
@ -1764,14 +1763,14 @@ new_job (struct file *file)
/* Replace it all with a single space. */
*out++ = ' ';
}
continue;
}
else
{
if (*in == openparen)
++count;
if (*in == closeparen && --count < 0)
break;
if (*in == openparen)
++count;
*out++ = *in++;
}
*out++ = *in++;
}
}
}

View File

@ -601,6 +601,7 @@ char *xstrndup (const char *, size_t);
char *find_next_token (const char **, size_t *);
char *next_token (const char *);
char *end_of_token (const char *);
char *skip_reference (const char *);
void collapse_continuations (char *);
char *lindex (const char *, const char *, int);
int alpha_compare (const void *, const void *);

View File

@ -418,6 +418,50 @@ next_token (const char *s)
return (char *)s;
}
/* This function returns P if P points to EOS, or P+1 if P is NOT an open
paren or brace, or a pointer to the character after the matching close
paren or brace, skipping matched internal parens or braces.
It is typically called when we have seen a '$' in a string and we want to
treat it as a variable reference and find the end of it: in that case P
should point to the character after the '$'. */
char *
skip_reference (const char *p)
{
char openparen = *p;
char closeparen;
int count = 1;
if (openparen == '\0')
return (char*)p;
if (openparen == '(')
closeparen = ')';
else if (openparen == '{')
closeparen = '}';
else
return (char*)(p+1);
while (1)
{
++p;
if (!STOP_SET (*p, MAP_NUL|MAP_VARSEP))
continue;
if (*p == '\0')
break;
if (*p == openparen)
++count;
else if (*p == closeparen && --count == 0)
{
++p;
break;
}
}
return (char*)p;
}
/* Find the next token in PTR; return the address of it, and store the length
of the token into *LENGTHPTR if LENGTHPTR is not nil. Set *PTR to the end
of the token, so this function can be called repeatedly in a loop. */

View File

@ -1669,33 +1669,10 @@ conditional_line (char *line, size_t len, const floc *flocp)
s1 = ++line;
/* Find the end of the first string. */
if (termin == ',')
{
int count = 0;
char *delim = xmalloc (strlen (line));
while (*line != '\0')
{
if (*line == '$')
{
++line;
if (*line == '(')
delim[count++] = ')';
else if (*line == '{')
delim[count++] = '}';
}
else if (count == 0)
{
if (*line == ',')
break;
}
else if (*line == delim[count-1])
--count;
++line;
}
free (delim);
}
else
while (*line != '\0' && *line != termin)
while (*line != '\0' && *line != termin)
if (*line == '$')
line = skip_reference (line+1);
else
++line;
if (*line == '\0')
@ -1703,7 +1680,7 @@ conditional_line (char *line, size_t len, const floc *flocp)
if (termin == ',')
{
/* Strip blanks after the first string. */
/* Strip blanks before the comma. */
char *p = line++;
while (ISBLANK (p[-1]))
--p;
@ -2355,35 +2332,7 @@ find_map_unquote (char *string, int stopmap)
/* If we stopped due to a variable reference, skip over its contents. */
if (*p == '$')
{
char openparen = p[1];
/* Check if '$' is the last character in the string. */
if (openparen == '\0')
break;
p += 2;
/* Skip the contents of a non-quoted, multi-char variable ref. */
if (openparen == '(' || openparen == '{')
{
unsigned int pcount = 1;
char closeparen = (openparen == '(' ? ')' : '}');
while (*p)
{
if (*p == openparen)
++pcount;
else if (*p == closeparen)
if (--pcount == 0)
{
++p;
break;
}
++p;
}
}
/* Skipped the variable reference: look for STOPCHARS again. */
p = skip_reference (p+1);
continue;
}
@ -2851,12 +2800,10 @@ get_next_mword (char *buffer, char **startp, size_t *length)
adjust our assumptions then. */
wtype = w_static;
/* We already found the first value of "c", above. */
while (1)
{
char closeparen;
int count;
/* Each time through the loop, "c" has the current character
and "p" points to the next character. */
if (END_OF_TOKEN (c))
goto done_word;
@ -2883,28 +2830,9 @@ get_next_mword (char *buffer, char **startp, size_t *length)
if (c == '\0')
goto done_word;
/* This is a variable reference, so note that it's expandable.
Then read it to the matching close paren. */
/* This is a variable reference: note that then skip it. */
wtype = w_variable;
if (c == '(')
closeparen = ')';
else if (c == '{')
closeparen = '}';
else
/* This is a single-letter variable reference. */
break;
for (count=0; *p != '\0'; ++p)
{
if (*p == c)
++count;
else if (*p == closeparen && --count < 0)
{
++p;
break;
}
}
p = skip_reference (p-1);
break;
case '?':

View File

@ -1732,7 +1732,7 @@ parse_variable_definition (const char *str, struct variable *var)
if (!end)
end = p - 1;
/* We need to distinguish :=, ::=, and :::=, and : outside of an
/* We need to distinguish :=, ::=, and :::=, versus : outside of an
assignment (which means this is not a variable definition). */
c = *p++;
if (c == '=')
@ -1789,41 +1789,7 @@ parse_variable_definition (const char *str, struct variable *var)
return NULL;
if (c == '$')
{
/* Skip any variable reference, to ensure we don't treat chars
inside the reference as assignment operators. */
char closeparen;
unsigned int count;
c = *p++;
switch (c)
{
case '(':
closeparen = ')';
break;
case '{':
closeparen = '}';
break;
case '\0':
return NULL;
default:
/* '$$' or '$X': skip it. */
continue;
}
/* P now points past the opening paren or brace. Count parens or
braces until we find the closing paren/brace. */
for (count = 1; *p != '\0'; ++p)
{
if (*p == closeparen && --count == 0)
{
++p;
break;
}
if (*p == c)
++count;
}
}
p = skip_reference (p);
}
/* We found a valid variable assignment: END points to the char after the