From d58a464141ee3265d85b6b3a03e0115316bc4cd0 Mon Sep 17 00:00:00 2001 From: Josipmrden Date: Sun, 3 Dec 2023 21:23:52 +0100 Subject: [PATCH] Remove filter profile info (#1481) --- src/query/plan/rewrite/index_lookup.hpp | 5 +++ src/query/plan/rewrite/join.hpp | 6 ++++ src/query/plan/rule_based_planner.hpp | 7 ++-- tests/e2e/CMakeLists.txt | 1 + tests/e2e/analyze_graph/optimize_indexes.py | 16 ++++----- tests/e2e/filter_info/CMakeLists.txt | 6 ++++ tests/e2e/filter_info/common.py | 23 ++++++++++++ tests/e2e/filter_info/filter_info.py | 39 +++++++++++++++++++++ tests/e2e/filter_info/workloads.yaml | 13 +++++++ tests/e2e/index_hints/index_hints.py | 19 +++++----- 10 files changed, 116 insertions(+), 19 deletions(-) create mode 100644 tests/e2e/filter_info/CMakeLists.txt create mode 100644 tests/e2e/filter_info/common.py create mode 100644 tests/e2e/filter_info/filter_info.py create mode 100644 tests/e2e/filter_info/workloads.yaml diff --git a/src/query/plan/rewrite/index_lookup.hpp b/src/query/plan/rewrite/index_lookup.hpp index 7bb88f659..590bad5f4 100644 --- a/src/query/plan/rewrite/index_lookup.hpp +++ b/src/query/plan/rewrite/index_lookup.hpp @@ -106,6 +106,11 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor { prev_ops_.pop_back(); ExpressionRemovalResult removal = RemoveExpressions(op.expression_, filter_exprs_for_removal_); op.expression_ = removal.trimmed_expression; + if (op.expression_) { + Filters leftover_filters; + leftover_filters.CollectFilterExpression(op.expression_, *symbol_table_); + op.all_filters_ = std::move(leftover_filters); + } // edge uniqueness filter comes always before filter in plan generation LogicalOperator *input = op.input().get(); diff --git a/src/query/plan/rewrite/join.hpp b/src/query/plan/rewrite/join.hpp index 65e32b3e8..e346ded45 100644 --- a/src/query/plan/rewrite/join.hpp +++ b/src/query/plan/rewrite/join.hpp @@ -59,6 +59,12 @@ class JoinRewriter final : public HierarchicalLogicalOperatorVisitor { ExpressionRemovalResult removal = RemoveExpressions(op.expression_, filter_exprs_for_removal_); op.expression_ = removal.trimmed_expression; + if (op.expression_) { + Filters leftover_filters; + leftover_filters.CollectFilterExpression(op.expression_, *symbol_table_); + op.all_filters_ = std::move(leftover_filters); + } + if (!op.expression_ || utils::Contains(filter_exprs_for_removal_, op.expression_)) { SetOnParent(op.input()); } diff --git a/src/query/plan/rule_based_planner.hpp b/src/query/plan/rule_based_planner.hpp index b58d0170b..a61658c90 100644 --- a/src/query/plan/rule_based_planner.hpp +++ b/src/query/plan/rule_based_planner.hpp @@ -897,13 +897,14 @@ class RuleBasedPlanner { std::unique_ptr GenFilters(std::unique_ptr last_op, const std::unordered_set &bound_symbols, Filters &filters, AstStorage &storage, const SymbolTable &symbol_table) { - auto all_filters = filters; auto pattern_filters = ExtractPatternFilters(filters, symbol_table, storage, bound_symbols); auto *filter_expr = impl::ExtractFilters(bound_symbols, filters, storage); if (filter_expr) { - last_op = - std::make_unique(std::move(last_op), std::move(pattern_filters), filter_expr, std::move(all_filters)); + Filters operator_filters; + operator_filters.CollectFilterExpression(filter_expr, symbol_table); + last_op = std::make_unique(std::move(last_op), std::move(pattern_filters), filter_expr, + std::move(operator_filters)); } return last_op; } diff --git a/tests/e2e/CMakeLists.txt b/tests/e2e/CMakeLists.txt index 28fe94559..da96f6d4a 100644 --- a/tests/e2e/CMakeLists.txt +++ b/tests/e2e/CMakeLists.txt @@ -70,6 +70,7 @@ add_subdirectory(index_hints) add_subdirectory(query_modules) add_subdirectory(constraints) add_subdirectory(inspect_query) +add_subdirectory(filter_info) add_subdirectory(queries) add_subdirectory(garbage_collection) diff --git a/tests/e2e/analyze_graph/optimize_indexes.py b/tests/e2e/analyze_graph/optimize_indexes.py index 325993ca1..c0530a827 100644 --- a/tests/e2e/analyze_graph/optimize_indexes.py +++ b/tests/e2e/analyze_graph/optimize_indexes.py @@ -62,7 +62,7 @@ def test_analyze_graph_delete_statistics(delete_query, multi_db): # After deleting statistics, id2 should be chosen because it has less vertices expected_explain_after_delete_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id1}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",), (f" * Once",), ] @@ -96,7 +96,7 @@ def test_analyze_full_graph(analyze_query, multi_db): # Choose id2 before tha analysis because it has less vertices expected_explain_before_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id1}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",), (f" * Once",), ] @@ -117,7 +117,7 @@ def test_analyze_full_graph(analyze_query, multi_db): # After analyzing graph, id1 index should be chosen because it has smaller average group size expected_explain_after_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id2}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",), (f" * Once",), ] @@ -152,7 +152,7 @@ def test_cardinality_different_avg_group_size_uniform_dist(multi_db): assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 20, 5, 0, 0) expected_explain_after_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id2}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",), (f" * Once",), ] @@ -183,7 +183,7 @@ def test_cardinality_same_avg_group_size_uniform_dist_diff_vertex_count(multi_db assert analyze_graph_results[1 - first_index] == ("Label", "id2", 50, 50, 1, 0, 0) expected_explain_after_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id1}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",), (f" * Once",), ] @@ -214,7 +214,7 @@ def test_large_diff_in_num_vertices_v1(multi_db): assert analyze_graph_results[1 - first_index] == ("Label", "id2", 99, 1, 99, 0, 0) expected_explain_after_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id1}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",), (f" * Once",), ] @@ -245,7 +245,7 @@ def test_large_diff_in_num_vertices_v2(multi_db): assert analyze_graph_results[1 - first_index] == ("Label", "id2", 1000, 1000, 1, 0, 0) expected_explain_after_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id2}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",), (f" * Once",), ] @@ -286,7 +286,7 @@ def test_same_avg_group_size_diff_distribution(multi_db): assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 5, 20, 0, 0) expected_explain_after_analysis = [ (f" * Produce {{n}}",), - (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",), + (f" * Filter {{n.id1}}",), (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",), (f" * Once",), ] diff --git a/tests/e2e/filter_info/CMakeLists.txt b/tests/e2e/filter_info/CMakeLists.txt new file mode 100644 index 000000000..401ec46a6 --- /dev/null +++ b/tests/e2e/filter_info/CMakeLists.txt @@ -0,0 +1,6 @@ +function(copy_filter_info_e2e_python_files FILE_NAME) + copy_e2e_python_files(filter_info ${FILE_NAME}) +endfunction() + +copy_filter_info_e2e_python_files(common.py) +copy_filter_info_e2e_python_files(filter_info.py) diff --git a/tests/e2e/filter_info/common.py b/tests/e2e/filter_info/common.py new file mode 100644 index 000000000..1eb81a94b --- /dev/null +++ b/tests/e2e/filter_info/common.py @@ -0,0 +1,23 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import pytest +from gqlalchemy import Memgraph + + +@pytest.fixture +def memgraph(**kwargs) -> Memgraph: + memgraph = Memgraph() + + yield memgraph + + memgraph.drop_database() + memgraph.drop_indexes() diff --git a/tests/e2e/filter_info/filter_info.py b/tests/e2e/filter_info/filter_info.py new file mode 100644 index 000000000..53bc737ea --- /dev/null +++ b/tests/e2e/filter_info/filter_info.py @@ -0,0 +1,39 @@ +# Copyright 2023 Memgraph Ltd. +# +# Use of this software is governed by the Business Source License +# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source +# License, and you may not use this file except in compliance with the Business Source License. +# +# As of the Change Date specified in that file, in accordance with +# the Business Source License, use of this software will be governed +# by the Apache License, Version 2.0, included in the file +# licenses/APL.txt. + +import sys + +import pytest +from common import memgraph + + +def test_label_index_hint(memgraph): + memgraph.execute("CREATE (n:Label1:Label2 {prop: 1});") + memgraph.execute("CREATE INDEX ON :Label1;") + + # TODO: Fix this test since it should only filter on :Label2 and prop + expected_explain = [ + " * Produce {n}", + " * Filter (n :Label1:Label2), {n.prop}", + " * ScanAllByLabel (n :Label1)", + " * Once", + ] + + actual_explain = [ + row["QUERY PLAN"] + for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1:Label2) WHERE n.prop = 1 return n;") + ] + + assert expected_explain == actual_explain + + +if __name__ == "__main__": + sys.exit(pytest.main([__file__, "-rA"])) diff --git a/tests/e2e/filter_info/workloads.yaml b/tests/e2e/filter_info/workloads.yaml new file mode 100644 index 000000000..69c98f741 --- /dev/null +++ b/tests/e2e/filter_info/workloads.yaml @@ -0,0 +1,13 @@ +filter_info_cluster: &filter_info_cluster + cluster: + main: + args: ["--bolt-port", "7687", "--log-level=TRACE"] + log_file: "filter_info.log" + setup_queries: [] + validation_queries: [] + +workloads: + - name: "Filter info information" + binary: "tests/e2e/pytest_runner.sh" + args: ["filter_info/filter_info.py"] + <<: *filter_info_cluster diff --git a/tests/e2e/index_hints/index_hints.py b/tests/e2e/index_hints/index_hints.py index 85b63a84b..70d3ce6b6 100644 --- a/tests/e2e/index_hints/index_hints.py +++ b/tests/e2e/index_hints/index_hints.py @@ -162,12 +162,13 @@ def test_label_property_index_hint(memgraph): expected_explain_no_hint = [ " * Produce {n}", - " * Filter (n :Label), {n.id1}, {n.id2}", + " * Filter {n.id1}", " * ScanAllByLabelPropertyValue (n :Label {id2})", " * Once", ] expected_explain_with_hint = [ - row.replace("(n :Label {id2})", "(n :Label {id1})") for row in expected_explain_no_hint + row.replace("(n :Label {id2})", "(n :Label {id1})").replace(" * Filter {n.id1}", " * Filter {n.id2}") + for row in expected_explain_no_hint ] explain_no_hint = [ @@ -192,7 +193,7 @@ def test_label_property_index_hint_alternative_orderings(memgraph): expected_explain_with_hint = [ " * Produce {n}", - " * Filter (n :Label), {n.id1}, {n.id2}", + " * Filter {n.id2}", " * ScanAllByLabelPropertyValue (n :Label {id1})", " * Once", ] @@ -221,7 +222,7 @@ def test_multiple_label_property_index_hints(memgraph): expected_explain_with_hint = [ " * Produce {n}", - " * Filter (n :Label), {n.id1}, {n.id2}", + " * Filter {n.id2}", " * ScanAllByLabelPropertyValue (n :Label {id1})", " * Once", ] @@ -251,7 +252,7 @@ def test_multiple_applicable_label_property_index_hints(memgraph): expected_explain_with_hint = [ " * Produce {n}", - " * Filter (n :Label), {n.id1}, {n.id2}", + " * Filter {n.id2}", " * ScanAllByLabelPropertyValue (n :Label {id1})", " * Once", ] @@ -275,12 +276,13 @@ def test_multiple_applicable_label_property_index_hints_alternative_orderings(me expected_explain_with_hint_1 = [ " * Produce {n}", - " * Filter (n :Label), {n.id1}, {n.id2}", + " * Filter {n.id2}", " * ScanAllByLabelPropertyValue (n :Label {id1})", " * Once", ] expected_explain_with_hint_2 = [ - row.replace("(n :Label {id1})", "(n :Label {id2})") for row in expected_explain_with_hint_1 + row.replace("(n :Label {id1})", "(n :Label {id2})").replace(" * Filter {n.id2}", " * Filter {n.id1}") + for row in expected_explain_with_hint_1 ] explain_with_hint_ordering_1a = [ @@ -407,6 +409,7 @@ def test_multiple_match_query(memgraph): memgraph.execute("CREATE INDEX ON :Label2;") memgraph.execute("CREATE INDEX ON :Label3;") + # TODO: Fix this test since it has the filtering info wrong (filtering by label that's already indexed) expected_explain_with_hint = [ " * Produce {n, m}", " * Cartesian {m : n}", @@ -414,7 +417,7 @@ def test_multiple_match_query(memgraph): " | * Filter (n :Label1:Label2), {n.id}", " | * ScanAllByLabel (n :Label1)", " | * Once", - " * Filter (m :Label2:Label3), (n :Label1:Label2), {n.id}", + " * Filter (m :Label2:Label3)", " * ScanAllByLabel (m :Label2)", " * Once", ]