Create a common method for skipping variable references

* README.git: Add some notes about using ASAN. * src/makeint.h: Declare skip_references(). * src/misc.c (skip_reference): A new function that will skip over a variable reference, counting matching open paren/brace characters. * src/implicit.c (get_next_word): Replace code with skip_reference(). * src/read.c (conditional_line): Ditto. (find_map_unquote): Ditto. (get_next_mword): Ditto. (parse_variable_definition): Ditto. * src/function.c (handle_function): Make clear that the passed in pointers are not modified if the function returns false. * src/expand.c (expand_string_buf): Don't create local variables to call handle_function() since it doesn't modify its arguments. * src/job.c (new_job): Small simplifications.
2025-04-02 07:30:38 +08:00 · 2024-01-10 00:01:33 -05:00 · 2024-01-10 00:01:33 -05:00 · 828906b6dc
commit 828906b6dc
parent b936970956
9 changed files with 99 additions and 185 deletions
--- a/README.git
+++ b/README.git
@ -209,6 +209,13 @@ work on non-GNU systems (Windows, MacOS, etc.)
    make clean
    make -j8 CFLAGS='-ggdb3 -fsanitize=address' LDFLAGS='-ggdb3 -fsanitize=address'
  Then to check for corruption only but not memory leaks run:
    ASAN_OPTIONS='detect_stack_after_use_return=true:detect_leaks=false' make check
  To check for leaks too run:
    make check
  Note that ASAN is reporting many more errors than valgrind.  I don't know
--- a/src/expand.c
+++ b/src/expand.c
@ -382,47 +382,40 @@ expand_string_buf (char *buf, const char *string, size_t length)
          {
            char openparen = *p;
            char closeparen = (openparen == '(') ? ')' : '}';
            const char *begp;
            const char *beg = p + 1;
            char *op;
            char *abeg = NULL;
            const char *end, *colon;
-            op = o;
+            if (handle_function (&o, &p))
-            begp = p;
+              break;
            if (handle_function (&op, &begp))
              {
                o = op;
                p = begp;
                break;
              }
            /* Is there a variable reference inside the parens or braces?
               If so, expand it before expanding the entire reference.  */
            end = strchr (beg, closeparen);
-            if (end == 0)
+            if (end == NULL)
              /* Unterminated variable reference.  */
              O (fatal, *expanding_var, _("unterminated variable reference"));
            p1 = lindex (beg, end, '$');
-            if (p1 != 0)
+            if (p1 != NULL)
              {
                /* BEG now points past the opening paren or brace.
                   Count parens or braces until it is matched.  */
-                int count = 0;
+                int count = 1;
                for (p = beg; *p != '\0'; ++p)
                  {
                    if (*p == openparen)
                      ++count;
-                    else if (*p == closeparen && --count < 0)
+                    else if (*p == closeparen && --count == 0)
                      break;
                  }
-                /* If COUNT is >= 0, there were unmatched opening parens
+                /* If COUNT is > 0, there were unmatched opening parens
                   or braces, so we go to the simple case of a variable name
                   such as '$($(a)'.  */
-                if (count < 0)
+                if (count == 0)
                  {
-                    abeg = expand_argument (beg, p); /* Expand the name.  */
+                    /* Expand the name.  */
                    abeg = expand_argument (beg, p);
                    beg = abeg;
                    end = strchr (beg, '\0');
                  }
--- a/src/function.c
+++ b/src/function.c
@ -2484,7 +2484,8 @@ expand_builtin_function (char *o, unsigned int argc, char **argv,
 /* Check for a function invocation in *STRINGP.  *STRINGP points at the
   opening ( or { and is not null-terminated.  If a function invocation
   is found, expand it into the buffer at *OP, updating *OP, incrementing
-   *STRINGP past the reference and returning nonzero.  If not, return zero.  */
+   *STRINGP past the reference, and return nonzero.
   If no function is found, return zero and don't change *OP or *STRINGP.  */
 int
 handle_function (char **op, const char **stringp)
@ -2512,10 +2513,10 @@ handle_function (char **op, const char **stringp)
  beg += entry_p->len;
  NEXT_TOKEN (beg);
-  /* Find the end of the function invocation, counting nested use of
+  /* Find the end of the function invocation, counting nested use of whichever
-     whichever kind of parens we use.  Since we're looking, count commas
+     kind of parens we use.  Don't use skip_reference so we can count commas
-     to get a rough estimate of how many arguments we might have.  The
+     to get a rough estimate of how many arguments we might have.  The count
-     count might be high, but it'll never be low.  */
+     might be high, but it'll never be low.  */
  for (nargs=1, end=beg; *end != '\0'; ++end)
    if (!STOP_SET (*end, MAP_VARSEP|MAP_COMMA))
--- a/src/implicit.c
+++ b/src/implicit.c
@ -87,12 +87,10 @@ get_next_word (const char *buffer, size_t *length)
    return 0;
  /* We already found the first value of "c", above.  */
  while (1)
    {
-      char closeparen;
+      /* Each time through the loop, "c" has the current char
-      int count;
+         and "p" points to the next char.  */
      switch (c)
        {
        case '\0':
@ -101,31 +99,8 @@ get_next_word (const char *buffer, size_t *length)
          goto done_word;
        case '$':
-          c = *(p++);
+          /* This is a variable reference, so skip it.  */
-          if (c == '$')
+          p = skip_reference (p);
            break;
          /* This is a variable reference, so read it to the matching
             close paren.  */
          if (c == '(')
            closeparen = ')';
          else if (c == '{')
            closeparen = '}';
          else
            /* This is a single-letter variable reference.  */
            break;
          for (count = 0; *p != '\0'; ++p)
            {
              if (*p == c)
                ++count;
              else if (*p == closeparen && --count < 0)
                {
                  ++p;
                  break;
                }
            }
          break;
        case '|':
--- a/src/job.c
+++ b/src/job.c
@ -1726,14 +1726,13 @@ new_job (struct file *file)
              *out++ = *in++;   /* Copy OPENPAREN.  */
              outref = out;
-              /* IN now points past the opening paren or brace.
+              /* IN now points past the opening paren or brace.  Count parens
-                 Count parens or braces until it is matched.  */
+                 or braces until it is matched.  We don't use skip_reference
                 since we want to handle internal backslash/newlines.  */
              count = 0;
              while (*in != '\0')
                {
-                  if (*in == closeparen && --count < 0)
+                  if (*in == '\\' && in[1] == '\n')
                    break;
                  else if (*in == '\\' && in[1] == '\n')
                    {
                      /* We have found a backslash-newline inside a
                         variable or function reference.  Eat it and
@ -1744,11 +1743,11 @@ new_job (struct file *file)
                        quoted = !quoted;
                      if (quoted)
-                        /* There were two or more backslashes, so this is
+                        /* There were an even number of backslashes, so this
-                           not really a continuation line.  We don't collapse
+                           is not really a continuation line.  We don't
-                           the quoting backslashes here as is done in
+                           collapse the quoting backslashes here as is done in
-                           collapse_continuations, because the line will
+                           collapse_continuations, because the line will be
-                           be collapsed again after expansion.  */
+                           collapsed again after expansion.  */
                        *out++ = *in++;
                      else
                        {
@ -1764,14 +1763,14 @@ new_job (struct file *file)
                          /* Replace it all with a single space.  */
                          *out++ = ' ';
                        }
                      continue;
                    }
-                  else
+                  if (*in == closeparen && --count < 0)
-                    {
+                    break;
-                      if (*in == openparen)
+                  if (*in == openparen)
-                        ++count;
+                    ++count;
-                      *out++ = *in++;
+                  *out++ = *in++;
                    }
                }
            }
        }
--- a/src/makeint.h
+++ b/src/makeint.h
@ -601,6 +601,7 @@ char *xstrndup (const char *, size_t);
 char *find_next_token (const char **, size_t *);
 char *next_token (const char *);
 char *end_of_token (const char *);
 char *skip_reference (const char *);
 void collapse_continuations (char *);
 char *lindex (const char *, const char *, int);
 int alpha_compare (const void *, const void *);
--- a/src/misc.c
+++ b/src/misc.c
@ -418,6 +418,50 @@ next_token (const char *s)
  return (char *)s;
 }
 /* This function returns P if P points to EOS, or P+1 if P is NOT an open
   paren or brace, or a pointer to the character after the matching close
   paren or brace, skipping matched internal parens or braces.
   It is typically called when we have seen a '$' in a string and we want to
   treat it as a variable reference and find the end of it: in that case P
   should point to the character after the '$'.  */
 char *
 skip_reference (const char *p)
 {
  char openparen = *p;
  char closeparen;
  int count = 1;
  if (openparen == '\0')
    return (char*)p;
  if (openparen == '(')
    closeparen = ')';
  else if (openparen == '{')
    closeparen = '}';
  else
    return (char*)(p+1);
  while (1)
    {
      ++p;
      if (!STOP_SET (*p, MAP_NUL|MAP_VARSEP))
        continue;
      if (*p == '\0')
        break;
      if (*p == openparen)
        ++count;
      else if (*p == closeparen && --count == 0)
        {
          ++p;
          break;
        }
    }
  return (char*)p;
 }
 /* Find the next token in PTR; return the address of it, and store the length
   of the token into *LENGTHPTR if LENGTHPTR is not nil.  Set *PTR to the end
   of the token, so this function can be called repeatedly in a loop.  */
--- a/src/read.c
+++ b/src/read.c
@ -1669,33 +1669,10 @@ conditional_line (char *line, size_t len, const floc *flocp)
      s1 = ++line;
      /* Find the end of the first string.  */
-      if (termin == ',')
+      while (*line != '\0' && *line != termin)
-        {
+        if (*line == '$')
-          int count = 0;
+          line = skip_reference (line+1);
-          char *delim = xmalloc (strlen (line));
+        else
          while (*line != '\0')
            {
              if (*line == '$')
                {
                  ++line;
                  if (*line == '(')
                    delim[count++] = ')';
                  else if (*line == '{')
                    delim[count++] = '}';
                }
              else if (count == 0)
                {
                  if (*line == ',')
                    break;
                }
              else if (*line == delim[count-1])
                --count;
              ++line;
            }
          free (delim);
        }
      else
        while (*line != '\0' && *line != termin)
          ++line;
      if (*line == '\0')
@ -1703,7 +1680,7 @@ conditional_line (char *line, size_t len, const floc *flocp)
      if (termin == ',')
        {
-          /* Strip blanks after the first string.  */
+          /* Strip blanks before the comma.  */
          char *p = line++;
          while (ISBLANK (p[-1]))
            --p;
@ -2355,35 +2332,7 @@ find_map_unquote (char *string, int stopmap)
      /* If we stopped due to a variable reference, skip over its contents.  */
      if (*p == '$')
        {
-          char openparen = p[1];
+          p = skip_reference (p+1);
          /* Check if '$' is the last character in the string.  */
          if (openparen == '\0')
            break;
          p += 2;
          /* Skip the contents of a non-quoted, multi-char variable ref.  */
          if (openparen == '(' || openparen == '{')
            {
              unsigned int pcount = 1;
              char closeparen = (openparen == '(' ? ')' : '}');
              while (*p)
                {
                  if (*p == openparen)
                    ++pcount;
                  else if (*p == closeparen)
                    if (--pcount == 0)
                      {
                        ++p;
                        break;
                      }
                  ++p;
                }
            }
          /* Skipped the variable reference: look for STOPCHARS again.  */
          continue;
        }
@ -2851,12 +2800,10 @@ get_next_mword (char *buffer, char **startp, size_t *length)
     adjust our assumptions then.  */
  wtype = w_static;
  /* We already found the first value of "c", above.  */
  while (1)
    {
-      char closeparen;
+      /* Each time through the loop, "c" has the current character
-      int count;
+         and "p" points to the next character.  */
      if (END_OF_TOKEN (c))
        goto done_word;
@ -2883,28 +2830,9 @@ get_next_mword (char *buffer, char **startp, size_t *length)
          if (c == '\0')
            goto done_word;
-          /* This is a variable reference, so note that it's expandable.
+          /* This is a variable reference: note that then skip it.  */
             Then read it to the matching close paren.  */
          wtype = w_variable;
-
+          p = skip_reference (p-1);
          if (c == '(')
            closeparen = ')';
          else if (c == '{')
            closeparen = '}';
          else
            /* This is a single-letter variable reference.  */
            break;
          for (count=0; *p != '\0'; ++p)
            {
              if (*p == c)
                ++count;
              else if (*p == closeparen && --count < 0)
                {
                  ++p;
                  break;
                }
            }
          break;
        case '?':
--- a/src/variable.c
+++ b/src/variable.c
@ -1732,7 +1732,7 @@ parse_variable_definition (const char *str, struct variable *var)
          if (!end)
            end = p - 1;
-          /* We need to distinguish :=, ::=, and :::=, and : outside of an
+          /* We need to distinguish :=, ::=, and :::=, versus : outside of an
             assignment (which means this is not a variable definition).  */
          c = *p++;
          if (c == '=')
@ -1789,41 +1789,7 @@ parse_variable_definition (const char *str, struct variable *var)
        return NULL;
      if (c == '$')
-        {
+        p = skip_reference (p);
          /* Skip any variable reference, to ensure we don't treat chars
             inside the reference as assignment operators.  */
          char closeparen;
          unsigned int count;
          c = *p++;
          switch (c)
            {
            case '(':
              closeparen = ')';
              break;
            case '{':
              closeparen = '}';
              break;
            case '\0':
              return NULL;
            default:
              /* '$$' or '$X': skip it.  */
              continue;
            }
          /* P now points past the opening paren or brace.  Count parens or
             braces until we find the closing paren/brace.  */
          for (count = 1; *p != '\0'; ++p)
            {
              if (*p == closeparen && --count == 0)
                {
                  ++p;
                  break;
                }
              if (*p == c)
                ++count;
            }
        }
    }
  /* We found a valid variable assignment: END points to the char after the