[SV 59093] Rewrite filter/filter-out to avoid large stack usage

* src/function.c (func_filter_filterout): Allocate arrays to hold
pattern and word information rather than creating linked lists on
the stack.
* tests/scripts/functions/filter-out: Test large filters.
This commit is contained in:
Paul Smith 2020-11-12 17:00:39 -05:00
parent f4f353bb54
commit e49e11e069
2 changed files with 98 additions and 80 deletions

View File

@ -910,7 +910,6 @@ func_foreach (char *o, char **argv, const char *funcname UNUSED)
struct a_word struct a_word
{ {
struct a_word *next;
struct a_word *chain; struct a_word *chain;
char *str; char *str;
size_t length; size_t length;
@ -941,7 +940,6 @@ a_word_hash_cmp (const void *x, const void *y)
struct a_pattern struct a_pattern
{ {
struct a_pattern *next;
char *str; char *str;
char *percent; char *percent;
size_t length; size_t length;
@ -950,78 +948,84 @@ struct a_pattern
static char * static char *
func_filter_filterout (char *o, char **argv, const char *funcname) func_filter_filterout (char *o, char **argv, const char *funcname)
{ {
struct a_word *wordhead; struct a_word *words;
struct a_word **wordtail; struct a_word *word_end;
struct a_word *wp; struct a_word *wp;
struct a_pattern *pathead; struct a_pattern *patterns;
struct a_pattern **pattail; struct a_pattern *pat_end;
struct a_pattern *pp; struct a_pattern *pp;
size_t pat_count = 0, word_count = 0;
struct hash_table a_word_table; struct hash_table a_word_table;
int is_filter = funcname[CSTRLEN ("filter")] == '\0'; int is_filter = funcname[CSTRLEN ("filter")] == '\0';
const char *pat_iterator = argv[0]; const char *cp;
const char *word_iterator = argv[1];
int literals = 0; int literals = 0;
int words = 0;
int hashing = 0; int hashing = 0;
char *p; char *p;
size_t len; size_t len;
int doneany = 0;
/* Chop ARGV[0] up into patterns to match against the words. /* Find the number of words and get memory for them. */
We don't need to preserve it because our caller frees all the cp = argv[1];
argument memory anyway. */ while ((p = find_next_token (&cp, NULL)) != 0)
++word_count;
pattail = &pathead; if (!word_count)
while ((p = find_next_token (&pat_iterator, &len)) != 0) return o;
words = xcalloc (word_count * sizeof (struct a_word));
word_end = words + word_count;
/* Find the number of patterns and get memory for them. */
cp = argv[0];
while ((p = find_next_token (&cp, NULL)) != 0)
++pat_count;
patterns = xcalloc (pat_count * sizeof (struct a_pattern));
pat_end = patterns + pat_count;
/* Chop argv[0] up into patterns to match against the words. */
cp = argv[0];
pp = patterns;
while ((p = find_next_token (&cp, &len)) != 0)
{ {
struct a_pattern *pat = alloca (sizeof (struct a_pattern)); if (*cp != '\0')
++cp;
*pattail = pat;
pattail = &pat->next;
if (*pat_iterator != '\0')
++pat_iterator;
pat->str = p;
p[len] = '\0'; p[len] = '\0';
pat->percent = find_percent (p); pp->str = p;
if (pat->percent == 0) pp->percent = find_percent (p);
if (pp->percent == 0)
literals++; literals++;
/* find_percent() might shorten the string so LEN is wrong. */ /* find_percent() might shorten the string so LEN is wrong. */
pat->length = strlen (pat->str); pp->length = strlen (pp->str);
++pp;
} }
*pattail = 0;
/* Chop ARGV[1] up into words to match against the patterns. */ /* Chop ARGV[1] up into words to match against the patterns. */
wordtail = &wordhead; cp = argv[1];
while ((p = find_next_token (&word_iterator, &len)) != 0) wp = words;
while ((p = find_next_token (&cp, &len)) != 0)
{ {
struct a_word *word = alloca (sizeof (struct a_word)); if (*cp != '\0')
++cp;
*wordtail = word;
wordtail = &word->next;
if (*word_iterator != '\0')
++word_iterator;
p[len] = '\0'; p[len] = '\0';
word->str = p; wp->str = p;
word->length = len; wp->length = len;
word->matched = 0; ++wp;
word->chain = 0;
words++;
} }
*wordtail = 0;
/* Only use a hash table if arg list lengths justifies the cost. */ /* Only use a hash table if arg list lengths justifies the cost. */
hashing = (literals >= 2 && (literals * words) >= 10); hashing = (literals > 1 && (literals * word_count) >= 10);
if (hashing) if (hashing)
{ {
hash_init (&a_word_table, words, a_word_hash_1, a_word_hash_2, hash_init (&a_word_table, word_count, a_word_hash_1, a_word_hash_2,
a_word_hash_cmp); a_word_hash_cmp);
for (wp = wordhead; wp != 0; wp = wp->next) for (wp = words; wp < word_end; ++wp)
{ {
struct a_word *owp = hash_insert (&a_word_table, wp); struct a_word *owp = hash_insert (&a_word_table, wp);
if (owp) if (owp)
@ -1029,15 +1033,11 @@ func_filter_filterout (char *o, char **argv, const char *funcname)
} }
} }
if (words)
{
int doneany = 0;
/* Run each pattern through the words, killing words. */ /* Run each pattern through the words, killing words. */
for (pp = pathead; pp != 0; pp = pp->next) for (pp = patterns; pp < pat_end; ++pp)
{ {
if (pp->percent) if (pp->percent)
for (wp = wordhead; wp != 0; wp = wp->next) for (wp = words; wp < word_end; ++wp)
wp->matched |= pattern_matches (pp->str, pp->percent, wp->str); wp->matched |= pattern_matches (pp->str, pp->percent, wp->str);
else if (hashing) else if (hashing)
{ {
@ -1052,13 +1052,13 @@ func_filter_filterout (char *o, char **argv, const char *funcname)
} }
} }
else else
for (wp = wordhead; wp != 0; wp = wp->next) for (wp = words; wp < word_end; ++wp)
wp->matched |= (wp->length == pp->length wp->matched |= (wp->length == pp->length
&& strneq (pp->str, wp->str, wp->length)); && strneq (pp->str, wp->str, wp->length));
} }
/* Output the words that matched (or didn't, for filter-out). */ /* Output the words that matched (or didn't, for filter-out). */
for (wp = wordhead; wp != 0; wp = wp->next) for (wp = words; wp < word_end; ++wp)
if (is_filter ? wp->matched : !wp->matched) if (is_filter ? wp->matched : !wp->matched)
{ {
o = variable_buffer_output (o, wp->str, strlen (wp->str)); o = variable_buffer_output (o, wp->str, strlen (wp->str));
@ -1069,11 +1069,13 @@ func_filter_filterout (char *o, char **argv, const char *funcname)
if (doneany) if (doneany)
/* Kill the last space. */ /* Kill the last space. */
--o; --o;
}
if (hashing) if (hashing)
hash_free (&a_word_table, 0); hash_free (&a_word_table, 0);
free (patterns);
free (words);
return o; return o;
} }

View File

@ -27,6 +27,22 @@ all: ; @echo '$(files1) $(files2)'
!, !,
'', "foo.elc foo.elc\n"); '', "foo.elc foo.elc\n");
# Force use of hash (see function.c:func_filter_filterout for params)
my $base = 'foo.1 foo.2 foo.3 foo.4 foo.5 foo.6 foo.7 foo.8 foo.9 foo.10';
my $base10 = join(' ', ($base) x 10);
my $out3 = join(' ', ('foo.3') x 10);
my $out456 = join(' ', ('foo.4 foo.5 foo.6') x 10);
run_make_test("words := $base10" . q!
files1 := $(filter %.3, $(words))
files2 := $(filter %.4 foo.5 foo.6, $(words))
all: ; @echo '$(files1) $(files2)'
!,
'', "$out3 $out456\n");
# Escaped patterns # Escaped patterns
run_make_test(q!all:;@echo '$(filter foo\%bar,foo%bar fooXbar)'!, run_make_test(q!all:;@echo '$(filter foo\%bar,foo%bar fooXbar)'!,
'', "foo%bar\n"); '', "foo%bar\n");