From 828906b6dcc85e2c7fa663435571ac73a97b52cc Mon Sep 17 00:00:00 2001
From: Paul Smith <psmith@gnu.org>
Date: Wed, 10 Jan 2024 00:01:33 -0500
Subject: [PATCH] Create a common method for skipping variable references

* README.git: Add some notes about using ASAN.
* src/makeint.h: Declare skip_references().
* src/misc.c (skip_reference): A new function that will skip over a
variable reference, counting matching open paren/brace characters.
* src/implicit.c (get_next_word): Replace code with skip_reference().
* src/read.c (conditional_line): Ditto.
(find_map_unquote): Ditto.
(get_next_mword): Ditto.
(parse_variable_definition): Ditto.
* src/function.c (handle_function): Make clear that the passed in
pointers are not modified if the function returns false.
* src/expand.c (expand_string_buf): Don't create local variables to
call handle_function() since it doesn't modify its arguments.
* src/job.c (new_job): Small simplifications.
---
 README.git     |  7 ++++
 src/expand.c   | 27 ++++++---------
 src/function.c | 11 +++---
 src/implicit.c | 33 +++---------------
 src/job.c      | 31 ++++++++---------
 src/makeint.h  |  1 +
 src/misc.c     | 44 ++++++++++++++++++++++++
 src/read.c     | 92 ++++++--------------------------------------------
 src/variable.c | 38 ++-------------------
 9 files changed, 99 insertions(+), 185 deletions(-)

diff --git a/README.git b/README.git
index dc49e9d6..045f6414 100644
--- a/README.git
+++ b/README.git
@@ -209,6 +209,13 @@ work on non-GNU systems (Windows, MacOS, etc.)
 
     make clean
     make -j8 CFLAGS='-ggdb3 -fsanitize=address' LDFLAGS='-ggdb3 -fsanitize=address'
+
+  Then to check for corruption only but not memory leaks run:
+
+    ASAN_OPTIONS='detect_stack_after_use_return=true:detect_leaks=false' make check
+
+  To check for leaks too run:
+
     make check
 
   Note that ASAN is reporting many more errors than valgrind.  I don't know
diff --git a/src/expand.c b/src/expand.c
index e840bc48..66c55e95 100644
--- a/src/expand.c
+++ b/src/expand.c
@@ -382,47 +382,40 @@ expand_string_buf (char *buf, const char *string, size_t length)
           {
             char openparen = *p;
             char closeparen = (openparen == '(') ? ')' : '}';
-            const char *begp;
             const char *beg = p + 1;
-            char *op;
             char *abeg = NULL;
             const char *end, *colon;
 
-            op = o;
-            begp = p;
-            if (handle_function (&op, &begp))
-              {
-                o = op;
-                p = begp;
-                break;
-              }
+            if (handle_function (&o, &p))
+              break;
 
             /* Is there a variable reference inside the parens or braces?
                If so, expand it before expanding the entire reference.  */
 
             end = strchr (beg, closeparen);
-            if (end == 0)
+            if (end == NULL)
               /* Unterminated variable reference.  */
               O (fatal, *expanding_var, _("unterminated variable reference"));
             p1 = lindex (beg, end, '$');
-            if (p1 != 0)
+            if (p1 != NULL)
               {
                 /* BEG now points past the opening paren or brace.
                    Count parens or braces until it is matched.  */
-                int count = 0;
+                int count = 1;
                 for (p = beg; *p != '\0'; ++p)
                   {
                     if (*p == openparen)
                       ++count;
-                    else if (*p == closeparen && --count < 0)
+                    else if (*p == closeparen && --count == 0)
                       break;
                   }
-                /* If COUNT is >= 0, there were unmatched opening parens
+                /* If COUNT is > 0, there were unmatched opening parens
                    or braces, so we go to the simple case of a variable name
                    such as '$($(a)'.  */
-                if (count < 0)
+                if (count == 0)
                   {
-                    abeg = expand_argument (beg, p); /* Expand the name.  */
+                    /* Expand the name.  */
+                    abeg = expand_argument (beg, p);
                     beg = abeg;
                     end = strchr (beg, '\0');
                   }
diff --git a/src/function.c b/src/function.c
index 5e7285a5..ab89d9de 100644
--- a/src/function.c
+++ b/src/function.c
@@ -2484,7 +2484,8 @@ expand_builtin_function (char *o, unsigned int argc, char **argv,
 /* Check for a function invocation in *STRINGP.  *STRINGP points at the
    opening ( or { and is not null-terminated.  If a function invocation
    is found, expand it into the buffer at *OP, updating *OP, incrementing
-   *STRINGP past the reference and returning nonzero.  If not, return zero.  */
+   *STRINGP past the reference, and return nonzero.
+   If no function is found, return zero and don't change *OP or *STRINGP.  */
 
 int
 handle_function (char **op, const char **stringp)
@@ -2512,10 +2513,10 @@ handle_function (char **op, const char **stringp)
   beg += entry_p->len;
   NEXT_TOKEN (beg);
 
-  /* Find the end of the function invocation, counting nested use of
-     whichever kind of parens we use.  Since we're looking, count commas
-     to get a rough estimate of how many arguments we might have.  The
-     count might be high, but it'll never be low.  */
+  /* Find the end of the function invocation, counting nested use of whichever
+     kind of parens we use.  Don't use skip_reference so we can count commas
+     to get a rough estimate of how many arguments we might have.  The count
+     might be high, but it'll never be low.  */
 
   for (nargs=1, end=beg; *end != '\0'; ++end)
     if (!STOP_SET (*end, MAP_VARSEP|MAP_COMMA))
diff --git a/src/implicit.c b/src/implicit.c
index 6806e5cd..134e3fef 100644
--- a/src/implicit.c
+++ b/src/implicit.c
@@ -87,12 +87,10 @@ get_next_word (const char *buffer, size_t *length)
     return 0;
 
 
-  /* We already found the first value of "c", above.  */
   while (1)
     {
-      char closeparen;
-      int count;
-
+      /* Each time through the loop, "c" has the current char
+         and "p" points to the next char.  */
       switch (c)
         {
         case '\0':
@@ -101,31 +99,8 @@ get_next_word (const char *buffer, size_t *length)
           goto done_word;
 
         case '$':
-          c = *(p++);
-          if (c == '$')
-            break;
-
-          /* This is a variable reference, so read it to the matching
-             close paren.  */
-
-          if (c == '(')
-            closeparen = ')';
-          else if (c == '{')
-            closeparen = '}';
-          else
-            /* This is a single-letter variable reference.  */
-            break;
-
-          for (count = 0; *p != '\0'; ++p)
-            {
-              if (*p == c)
-                ++count;
-              else if (*p == closeparen && --count < 0)
-                {
-                  ++p;
-                  break;
-                }
-            }
+          /* This is a variable reference, so skip it.  */
+          p = skip_reference (p);
           break;
 
         case '|':
diff --git a/src/job.c b/src/job.c
index dfd844dc..45e45c5c 100644
--- a/src/job.c
+++ b/src/job.c
@@ -1726,14 +1726,13 @@ new_job (struct file *file)
 
               *out++ = *in++;   /* Copy OPENPAREN.  */
               outref = out;
-              /* IN now points past the opening paren or brace.
-                 Count parens or braces until it is matched.  */
+              /* IN now points past the opening paren or brace.  Count parens
+                 or braces until it is matched.  We don't use skip_reference
+                 since we want to handle internal backslash/newlines.  */
               count = 0;
               while (*in != '\0')
                 {
-                  if (*in == closeparen && --count < 0)
-                    break;
-                  else if (*in == '\\' && in[1] == '\n')
+                  if (*in == '\\' && in[1] == '\n')
                     {
                       /* We have found a backslash-newline inside a
                          variable or function reference.  Eat it and
@@ -1744,11 +1743,11 @@ new_job (struct file *file)
                         quoted = !quoted;
 
                       if (quoted)
-                        /* There were two or more backslashes, so this is
-                           not really a continuation line.  We don't collapse
-                           the quoting backslashes here as is done in
-                           collapse_continuations, because the line will
-                           be collapsed again after expansion.  */
+                        /* There were an even number of backslashes, so this
+                           is not really a continuation line.  We don't
+                           collapse the quoting backslashes here as is done in
+                           collapse_continuations, because the line will be
+                           collapsed again after expansion.  */
                         *out++ = *in++;
                       else
                         {
@@ -1764,14 +1763,14 @@ new_job (struct file *file)
                           /* Replace it all with a single space.  */
                           *out++ = ' ';
                         }
+                      continue;
                     }
-                  else
-                    {
-                      if (*in == openparen)
-                        ++count;
+                  if (*in == closeparen && --count < 0)
+                    break;
+                  if (*in == openparen)
+                    ++count;
 
-                      *out++ = *in++;
-                    }
+                  *out++ = *in++;
                 }
             }
         }
diff --git a/src/makeint.h b/src/makeint.h
index 00289b9c..869cf461 100644
--- a/src/makeint.h
+++ b/src/makeint.h
@@ -601,6 +601,7 @@ char *xstrndup (const char *, size_t);
 char *find_next_token (const char **, size_t *);
 char *next_token (const char *);
 char *end_of_token (const char *);
+char *skip_reference (const char *);
 void collapse_continuations (char *);
 char *lindex (const char *, const char *, int);
 int alpha_compare (const void *, const void *);
diff --git a/src/misc.c b/src/misc.c
index 64309900..a53301df 100644
--- a/src/misc.c
+++ b/src/misc.c
@@ -418,6 +418,50 @@ next_token (const char *s)
   return (char *)s;
 }
 
+/* This function returns P if P points to EOS, or P+1 if P is NOT an open
+   paren or brace, or a pointer to the character after the matching close
+   paren or brace, skipping matched internal parens or braces.
+
+   It is typically called when we have seen a '$' in a string and we want to
+   treat it as a variable reference and find the end of it: in that case P
+   should point to the character after the '$'.  */
+
+char *
+skip_reference (const char *p)
+{
+  char openparen = *p;
+  char closeparen;
+  int count = 1;
+
+  if (openparen == '\0')
+    return (char*)p;
+
+  if (openparen == '(')
+    closeparen = ')';
+  else if (openparen == '{')
+    closeparen = '}';
+  else
+    return (char*)(p+1);
+
+  while (1)
+    {
+      ++p;
+      if (!STOP_SET (*p, MAP_NUL|MAP_VARSEP))
+        continue;
+      if (*p == '\0')
+        break;
+      if (*p == openparen)
+        ++count;
+      else if (*p == closeparen && --count == 0)
+        {
+          ++p;
+          break;
+        }
+    }
+
+  return (char*)p;
+}
+
 /* Find the next token in PTR; return the address of it, and store the length
    of the token into *LENGTHPTR if LENGTHPTR is not nil.  Set *PTR to the end
    of the token, so this function can be called repeatedly in a loop.  */
diff --git a/src/read.c b/src/read.c
index e97d3061..c5a635f9 100644
--- a/src/read.c
+++ b/src/read.c
@@ -1669,33 +1669,10 @@ conditional_line (char *line, size_t len, const floc *flocp)
 
       s1 = ++line;
       /* Find the end of the first string.  */
-      if (termin == ',')
-        {
-          int count = 0;
-          char *delim = xmalloc (strlen (line));
-          while (*line != '\0')
-            {
-              if (*line == '$')
-                {
-                  ++line;
-                  if (*line == '(')
-                    delim[count++] = ')';
-                  else if (*line == '{')
-                    delim[count++] = '}';
-                }
-              else if (count == 0)
-                {
-                  if (*line == ',')
-                    break;
-                }
-              else if (*line == delim[count-1])
-                --count;
-              ++line;
-            }
-          free (delim);
-        }
-      else
-        while (*line != '\0' && *line != termin)
+      while (*line != '\0' && *line != termin)
+        if (*line == '$')
+          line = skip_reference (line+1);
+        else
           ++line;
 
       if (*line == '\0')
@@ -1703,7 +1680,7 @@ conditional_line (char *line, size_t len, const floc *flocp)
 
       if (termin == ',')
         {
-          /* Strip blanks after the first string.  */
+          /* Strip blanks before the comma.  */
           char *p = line++;
           while (ISBLANK (p[-1]))
             --p;
@@ -2355,35 +2332,7 @@ find_map_unquote (char *string, int stopmap)
       /* If we stopped due to a variable reference, skip over its contents.  */
       if (*p == '$')
         {
-          char openparen = p[1];
-
-          /* Check if '$' is the last character in the string.  */
-          if (openparen == '\0')
-            break;
-
-          p += 2;
-
-          /* Skip the contents of a non-quoted, multi-char variable ref.  */
-          if (openparen == '(' || openparen == '{')
-            {
-              unsigned int pcount = 1;
-              char closeparen = (openparen == '(' ? ')' : '}');
-
-              while (*p)
-                {
-                  if (*p == openparen)
-                    ++pcount;
-                  else if (*p == closeparen)
-                    if (--pcount == 0)
-                      {
-                        ++p;
-                        break;
-                      }
-                  ++p;
-                }
-            }
-
-          /* Skipped the variable reference: look for STOPCHARS again.  */
+          p = skip_reference (p+1);
           continue;
         }
 
@@ -2851,12 +2800,10 @@ get_next_mword (char *buffer, char **startp, size_t *length)
      adjust our assumptions then.  */
   wtype = w_static;
 
-  /* We already found the first value of "c", above.  */
   while (1)
     {
-      char closeparen;
-      int count;
-
+      /* Each time through the loop, "c" has the current character
+         and "p" points to the next character.  */
       if (END_OF_TOKEN (c))
         goto done_word;
 
@@ -2883,28 +2830,9 @@ get_next_mword (char *buffer, char **startp, size_t *length)
           if (c == '\0')
             goto done_word;
 
-          /* This is a variable reference, so note that it's expandable.
-             Then read it to the matching close paren.  */
+          /* This is a variable reference: note that then skip it.  */
           wtype = w_variable;
-
-          if (c == '(')
-            closeparen = ')';
-          else if (c == '{')
-            closeparen = '}';
-          else
-            /* This is a single-letter variable reference.  */
-            break;
-
-          for (count=0; *p != '\0'; ++p)
-            {
-              if (*p == c)
-                ++count;
-              else if (*p == closeparen && --count < 0)
-                {
-                  ++p;
-                  break;
-                }
-            }
+          p = skip_reference (p-1);
           break;
 
         case '?':
diff --git a/src/variable.c b/src/variable.c
index 9d4aec42..8b63df2d 100644
--- a/src/variable.c
+++ b/src/variable.c
@@ -1732,7 +1732,7 @@ parse_variable_definition (const char *str, struct variable *var)
           if (!end)
             end = p - 1;
 
-          /* We need to distinguish :=, ::=, and :::=, and : outside of an
+          /* We need to distinguish :=, ::=, and :::=, versus : outside of an
              assignment (which means this is not a variable definition).  */
           c = *p++;
           if (c == '=')
@@ -1789,41 +1789,7 @@ parse_variable_definition (const char *str, struct variable *var)
         return NULL;
 
       if (c == '$')
-        {
-          /* Skip any variable reference, to ensure we don't treat chars
-             inside the reference as assignment operators.  */
-          char closeparen;
-          unsigned int count;
-
-          c = *p++;
-          switch (c)
-            {
-            case '(':
-              closeparen = ')';
-              break;
-            case '{':
-              closeparen = '}';
-              break;
-            case '\0':
-              return NULL;
-            default:
-              /* '$$' or '$X': skip it.  */
-              continue;
-            }
-
-          /* P now points past the opening paren or brace.  Count parens or
-             braces until we find the closing paren/brace.  */
-          for (count = 1; *p != '\0'; ++p)
-            {
-              if (*p == closeparen && --count == 0)
-                {
-                  ++p;
-                  break;
-                }
-              if (*p == c)
-                ++count;
-            }
-        }
+        p = skip_reference (p);
     }
 
   /* We found a valid variable assignment: END points to the char after the