Remove filter profile info (#1481)

This commit is contained in:
Josipmrden 2023-12-03 21:23:52 +01:00 committed by GitHub
parent 3ccd78ac71
commit d58a464141
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 116 additions and 19 deletions

View File

@ -106,6 +106,11 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
prev_ops_.pop_back();
ExpressionRemovalResult removal = RemoveExpressions(op.expression_, filter_exprs_for_removal_);
op.expression_ = removal.trimmed_expression;
if (op.expression_) {
Filters leftover_filters;
leftover_filters.CollectFilterExpression(op.expression_, *symbol_table_);
op.all_filters_ = std::move(leftover_filters);
}
// edge uniqueness filter comes always before filter in plan generation
LogicalOperator *input = op.input().get();

View File

@ -59,6 +59,12 @@ class JoinRewriter final : public HierarchicalLogicalOperatorVisitor {
ExpressionRemovalResult removal = RemoveExpressions(op.expression_, filter_exprs_for_removal_);
op.expression_ = removal.trimmed_expression;
if (op.expression_) {
Filters leftover_filters;
leftover_filters.CollectFilterExpression(op.expression_, *symbol_table_);
op.all_filters_ = std::move(leftover_filters);
}
if (!op.expression_ || utils::Contains(filter_exprs_for_removal_, op.expression_)) {
SetOnParent(op.input());
}

View File

@ -897,13 +897,14 @@ class RuleBasedPlanner {
std::unique_ptr<LogicalOperator> GenFilters(std::unique_ptr<LogicalOperator> last_op,
const std::unordered_set<Symbol> &bound_symbols, Filters &filters,
AstStorage &storage, const SymbolTable &symbol_table) {
auto all_filters = filters;
auto pattern_filters = ExtractPatternFilters(filters, symbol_table, storage, bound_symbols);
auto *filter_expr = impl::ExtractFilters(bound_symbols, filters, storage);
if (filter_expr) {
last_op =
std::make_unique<Filter>(std::move(last_op), std::move(pattern_filters), filter_expr, std::move(all_filters));
Filters operator_filters;
operator_filters.CollectFilterExpression(filter_expr, symbol_table);
last_op = std::make_unique<Filter>(std::move(last_op), std::move(pattern_filters), filter_expr,
std::move(operator_filters));
}
return last_op;
}

View File

@ -70,6 +70,7 @@ add_subdirectory(index_hints)
add_subdirectory(query_modules)
add_subdirectory(constraints)
add_subdirectory(inspect_query)
add_subdirectory(filter_info)
add_subdirectory(queries)
add_subdirectory(garbage_collection)

View File

@ -62,7 +62,7 @@ def test_analyze_graph_delete_statistics(delete_query, multi_db):
# After deleting statistics, id2 should be chosen because it has less vertices
expected_explain_after_delete_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id1}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
(f" * Once",),
]
@ -96,7 +96,7 @@ def test_analyze_full_graph(analyze_query, multi_db):
# Choose id2 before tha analysis because it has less vertices
expected_explain_before_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id1}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
(f" * Once",),
]
@ -117,7 +117,7 @@ def test_analyze_full_graph(analyze_query, multi_db):
# After analyzing graph, id1 index should be chosen because it has smaller average group size
expected_explain_after_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id2}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
(f" * Once",),
]
@ -152,7 +152,7 @@ def test_cardinality_different_avg_group_size_uniform_dist(multi_db):
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 20, 5, 0, 0)
expected_explain_after_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id2}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
(f" * Once",),
]
@ -183,7 +183,7 @@ def test_cardinality_same_avg_group_size_uniform_dist_diff_vertex_count(multi_db
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 50, 50, 1, 0, 0)
expected_explain_after_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id1}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
(f" * Once",),
]
@ -214,7 +214,7 @@ def test_large_diff_in_num_vertices_v1(multi_db):
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 99, 1, 99, 0, 0)
expected_explain_after_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id1}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
(f" * Once",),
]
@ -245,7 +245,7 @@ def test_large_diff_in_num_vertices_v2(multi_db):
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 1000, 1000, 1, 0, 0)
expected_explain_after_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id2}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
(f" * Once",),
]
@ -286,7 +286,7 @@ def test_same_avg_group_size_diff_distribution(multi_db):
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 5, 20, 0, 0)
expected_explain_after_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id1}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
(f" * Once",),
]

View File

@ -0,0 +1,6 @@
function(copy_filter_info_e2e_python_files FILE_NAME)
copy_e2e_python_files(filter_info ${FILE_NAME})
endfunction()
copy_filter_info_e2e_python_files(common.py)
copy_filter_info_e2e_python_files(filter_info.py)

View File

@ -0,0 +1,23 @@
# Copyright 2023 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import pytest
from gqlalchemy import Memgraph
@pytest.fixture
def memgraph(**kwargs) -> Memgraph:
memgraph = Memgraph()
yield memgraph
memgraph.drop_database()
memgraph.drop_indexes()

View File

@ -0,0 +1,39 @@
# Copyright 2023 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import sys
import pytest
from common import memgraph
def test_label_index_hint(memgraph):
memgraph.execute("CREATE (n:Label1:Label2 {prop: 1});")
memgraph.execute("CREATE INDEX ON :Label1;")
# TODO: Fix this test since it should only filter on :Label2 and prop
expected_explain = [
" * Produce {n}",
" * Filter (n :Label1:Label2), {n.prop}",
" * ScanAllByLabel (n :Label1)",
" * Once",
]
actual_explain = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1:Label2) WHERE n.prop = 1 return n;")
]
assert expected_explain == actual_explain
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-rA"]))

View File

@ -0,0 +1,13 @@
filter_info_cluster: &filter_info_cluster
cluster:
main:
args: ["--bolt-port", "7687", "--log-level=TRACE"]
log_file: "filter_info.log"
setup_queries: []
validation_queries: []
workloads:
- name: "Filter info information"
binary: "tests/e2e/pytest_runner.sh"
args: ["filter_info/filter_info.py"]
<<: *filter_info_cluster

View File

@ -162,12 +162,13 @@ def test_label_property_index_hint(memgraph):
expected_explain_no_hint = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * Filter {n.id1}",
" * ScanAllByLabelPropertyValue (n :Label {id2})",
" * Once",
]
expected_explain_with_hint = [
row.replace("(n :Label {id2})", "(n :Label {id1})") for row in expected_explain_no_hint
row.replace("(n :Label {id2})", "(n :Label {id1})").replace(" * Filter {n.id1}", " * Filter {n.id2}")
for row in expected_explain_no_hint
]
explain_no_hint = [
@ -192,7 +193,7 @@ def test_label_property_index_hint_alternative_orderings(memgraph):
expected_explain_with_hint = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * Filter {n.id2}",
" * ScanAllByLabelPropertyValue (n :Label {id1})",
" * Once",
]
@ -221,7 +222,7 @@ def test_multiple_label_property_index_hints(memgraph):
expected_explain_with_hint = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * Filter {n.id2}",
" * ScanAllByLabelPropertyValue (n :Label {id1})",
" * Once",
]
@ -251,7 +252,7 @@ def test_multiple_applicable_label_property_index_hints(memgraph):
expected_explain_with_hint = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * Filter {n.id2}",
" * ScanAllByLabelPropertyValue (n :Label {id1})",
" * Once",
]
@ -275,12 +276,13 @@ def test_multiple_applicable_label_property_index_hints_alternative_orderings(me
expected_explain_with_hint_1 = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * Filter {n.id2}",
" * ScanAllByLabelPropertyValue (n :Label {id1})",
" * Once",
]
expected_explain_with_hint_2 = [
row.replace("(n :Label {id1})", "(n :Label {id2})") for row in expected_explain_with_hint_1
row.replace("(n :Label {id1})", "(n :Label {id2})").replace(" * Filter {n.id2}", " * Filter {n.id1}")
for row in expected_explain_with_hint_1
]
explain_with_hint_ordering_1a = [
@ -407,6 +409,7 @@ def test_multiple_match_query(memgraph):
memgraph.execute("CREATE INDEX ON :Label2;")
memgraph.execute("CREATE INDEX ON :Label3;")
# TODO: Fix this test since it has the filtering info wrong (filtering by label that's already indexed)
expected_explain_with_hint = [
" * Produce {n, m}",
" * Cartesian {m : n}",
@ -414,7 +417,7 @@ def test_multiple_match_query(memgraph):
" | * Filter (n :Label1:Label2), {n.id}",
" | * ScanAllByLabel (n :Label1)",
" | * Once",
" * Filter (m :Label2:Label3), (n :Label1:Label2), {n.id}",
" * Filter (m :Label2:Label3)",
" * ScanAllByLabel (m :Label2)",
" * Once",
]