Remove filter profile info (#1481)

2023-12-03 21:23:52 +01:00 · 2023-12-03 21:23:52 +01:00 · d58a464141
commit d58a464141
parent 3ccd78ac71
10 changed files with 116 additions and 19 deletions
--- a/src/query/plan/rewrite/index_lookup.hpp
+++ b/src/query/plan/rewrite/index_lookup.hpp
@ -106,6 +106,11 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
    prev_ops_.pop_back();
    ExpressionRemovalResult removal = RemoveExpressions(op.expression_, filter_exprs_for_removal_);
    op.expression_ = removal.trimmed_expression;
+    if (op.expression_) {
+      Filters leftover_filters;
+      leftover_filters.CollectFilterExpression(op.expression_, *symbol_table_);
+      op.all_filters_ = std::move(leftover_filters);
+    }

    // edge uniqueness filter comes always before filter in plan generation
    LogicalOperator *input = op.input().get();
--- a/src/query/plan/rewrite/join.hpp
+++ b/src/query/plan/rewrite/join.hpp
@ -59,6 +59,12 @@ class JoinRewriter final : public HierarchicalLogicalOperatorVisitor {

    ExpressionRemovalResult removal = RemoveExpressions(op.expression_, filter_exprs_for_removal_);
    op.expression_ = removal.trimmed_expression;
+    if (op.expression_) {
+      Filters leftover_filters;
+      leftover_filters.CollectFilterExpression(op.expression_, *symbol_table_);
+      op.all_filters_ = std::move(leftover_filters);
+    }
+
    if (!op.expression_ || utils::Contains(filter_exprs_for_removal_, op.expression_)) {
      SetOnParent(op.input());
    }
--- a/src/query/plan/rule_based_planner.hpp
+++ b/src/query/plan/rule_based_planner.hpp
@ -897,13 +897,14 @@ class RuleBasedPlanner {
  std::unique_ptr<LogicalOperator> GenFilters(std::unique_ptr<LogicalOperator> last_op,
                                              const std::unordered_set<Symbol> &bound_symbols, Filters &filters,
                                              AstStorage &storage, const SymbolTable &symbol_table) {
-    auto all_filters = filters;
    auto pattern_filters = ExtractPatternFilters(filters, symbol_table, storage, bound_symbols);
    auto *filter_expr = impl::ExtractFilters(bound_symbols, filters, storage);

    if (filter_expr) {
-      last_op =
-          std::make_unique<Filter>(std::move(last_op), std::move(pattern_filters), filter_expr, std::move(all_filters));
+      Filters operator_filters;
+      operator_filters.CollectFilterExpression(filter_expr, symbol_table);
+      last_op = std::make_unique<Filter>(std::move(last_op), std::move(pattern_filters), filter_expr,
+                                         std::move(operator_filters));
    }
    return last_op;
  }
--- a/tests/e2e/CMakeLists.txt
+++ b/tests/e2e/CMakeLists.txt
@ -70,6 +70,7 @@ add_subdirectory(index_hints)
 add_subdirectory(query_modules)
 add_subdirectory(constraints)
 add_subdirectory(inspect_query)
+add_subdirectory(filter_info)
 add_subdirectory(queries)
 add_subdirectory(garbage_collection)

--- a/tests/e2e/analyze_graph/optimize_indexes.py
+++ b/tests/e2e/analyze_graph/optimize_indexes.py
@ -62,7 +62,7 @@ def test_analyze_graph_delete_statistics(delete_query, multi_db):
    # After deleting statistics, id2 should be chosen because it has less vertices
    expected_explain_after_delete_analysis = [
        (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id1}}",),
        (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
        (f" * Once",),
    ]
@ -96,7 +96,7 @@ def test_analyze_full_graph(analyze_query, multi_db):
    # Choose id2 before tha analysis because it has less vertices
    expected_explain_before_analysis = [
        (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id1}}",),
        (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
        (f" * Once",),
    ]
@ -117,7 +117,7 @@ def test_analyze_full_graph(analyze_query, multi_db):
    # After analyzing graph, id1 index should be chosen because it has smaller average group size
    expected_explain_after_analysis = [
        (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id2}}",),
        (f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
        (f" * Once",),
    ]
@ -152,7 +152,7 @@ def test_cardinality_different_avg_group_size_uniform_dist(multi_db):
    assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 20, 5, 0, 0)
    expected_explain_after_analysis = [
        (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id2}}",),
        (f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
        (f" * Once",),
    ]
@ -183,7 +183,7 @@ def test_cardinality_same_avg_group_size_uniform_dist_diff_vertex_count(multi_db
    assert analyze_graph_results[1 - first_index] == ("Label", "id2", 50, 50, 1, 0, 0)
    expected_explain_after_analysis = [
        (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id1}}",),
        (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
        (f" * Once",),
    ]
@ -214,7 +214,7 @@ def test_large_diff_in_num_vertices_v1(multi_db):
    assert analyze_graph_results[1 - first_index] == ("Label", "id2", 99, 1, 99, 0, 0)
    expected_explain_after_analysis = [
        (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id1}}",),
        (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
        (f" * Once",),
    ]
@ -245,7 +245,7 @@ def test_large_diff_in_num_vertices_v2(multi_db):
    assert analyze_graph_results[1 - first_index] == ("Label", "id2", 1000, 1000, 1, 0, 0)
    expected_explain_after_analysis = [
        (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id2}}",),
        (f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
        (f" * Once",),
    ]
@ -286,7 +286,7 @@ def test_same_avg_group_size_diff_distribution(multi_db):
    assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 5, 20, 0, 0)
    expected_explain_after_analysis = [
        (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id1}}",),
        (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
        (f" * Once",),
    ]
--- a/tests/e2e/filter_info/CMakeLists.txt
+++ b/tests/e2e/filter_info/CMakeLists.txt
@ -0,0 +1,6 @@
+function(copy_filter_info_e2e_python_files FILE_NAME)
+    copy_e2e_python_files(filter_info ${FILE_NAME})
+endfunction()
+
+copy_filter_info_e2e_python_files(common.py)
+copy_filter_info_e2e_python_files(filter_info.py)
--- a/tests/e2e/filter_info/common.py
+++ b/tests/e2e/filter_info/common.py
@ -0,0 +1,23 @@
+# Copyright 2023 Memgraph Ltd.
+#
+# Use of this software is governed by the Business Source License
+# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
+# License, and you may not use this file except in compliance with the Business Source License.
+#
+# As of the Change Date specified in that file, in accordance with
+# the Business Source License, use of this software will be governed
+# by the Apache License, Version 2.0, included in the file
+# licenses/APL.txt.
+
+import pytest
+from gqlalchemy import Memgraph
+
+
+@pytest.fixture
+def memgraph(**kwargs) -> Memgraph:
+    memgraph = Memgraph()
+
+    yield memgraph
+
+    memgraph.drop_database()
+    memgraph.drop_indexes()
--- a/tests/e2e/filter_info/filter_info.py
+++ b/tests/e2e/filter_info/filter_info.py
@ -0,0 +1,39 @@
+# Copyright 2023 Memgraph Ltd.
+#
+# Use of this software is governed by the Business Source License
+# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
+# License, and you may not use this file except in compliance with the Business Source License.
+#
+# As of the Change Date specified in that file, in accordance with
+# the Business Source License, use of this software will be governed
+# by the Apache License, Version 2.0, included in the file
+# licenses/APL.txt.
+
+import sys
+
+import pytest
+from common import memgraph
+
+
+def test_label_index_hint(memgraph):
+    memgraph.execute("CREATE (n:Label1:Label2 {prop: 1});")
+    memgraph.execute("CREATE INDEX ON :Label1;")
+
+    # TODO: Fix this test since it should only filter on :Label2 and prop
+    expected_explain = [
+        " * Produce {n}",
+        " * Filter (n :Label1:Label2), {n.prop}",
+        " * ScanAllByLabel (n :Label1)",
+        " * Once",
+    ]
+
+    actual_explain = [
+        row["QUERY PLAN"]
+        for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1:Label2) WHERE n.prop = 1 return n;")
+    ]
+
+    assert expected_explain == actual_explain
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__, "-rA"]))
--- a/tests/e2e/filter_info/workloads.yaml
+++ b/tests/e2e/filter_info/workloads.yaml
@ -0,0 +1,13 @@
+filter_info_cluster: &filter_info_cluster
+  cluster:
+    main:
+      args: ["--bolt-port", "7687", "--log-level=TRACE"]
+      log_file: "filter_info.log"
+      setup_queries: []
+      validation_queries: []
+
+workloads:
+  - name: "Filter info information"
+    binary: "tests/e2e/pytest_runner.sh"
+    args: ["filter_info/filter_info.py"]
+    <<: *filter_info_cluster
--- a/tests/e2e/index_hints/index_hints.py
+++ b/tests/e2e/index_hints/index_hints.py
@ -162,12 +162,13 @@ def test_label_property_index_hint(memgraph):

    expected_explain_no_hint = [
        " * Produce {n}",
-        " * Filter (n :Label), {n.id1}, {n.id2}",
+        " * Filter {n.id1}",
        " * ScanAllByLabelPropertyValue (n :Label {id2})",
        " * Once",
    ]
    expected_explain_with_hint = [
-        row.replace("(n :Label {id2})", "(n :Label {id1})") for row in expected_explain_no_hint
+        row.replace("(n :Label {id2})", "(n :Label {id1})").replace(" * Filter {n.id1}", " * Filter {n.id2}")
+        for row in expected_explain_no_hint
    ]

    explain_no_hint = [
@ -192,7 +193,7 @@ def test_label_property_index_hint_alternative_orderings(memgraph):

    expected_explain_with_hint = [
        " * Produce {n}",
-        " * Filter (n :Label), {n.id1}, {n.id2}",
+        " * Filter {n.id2}",
        " * ScanAllByLabelPropertyValue (n :Label {id1})",
        " * Once",
    ]
@ -221,7 +222,7 @@ def test_multiple_label_property_index_hints(memgraph):

    expected_explain_with_hint = [
        " * Produce {n}",
-        " * Filter (n :Label), {n.id1}, {n.id2}",
+        " * Filter {n.id2}",
        " * ScanAllByLabelPropertyValue (n :Label {id1})",
        " * Once",
    ]
@ -251,7 +252,7 @@ def test_multiple_applicable_label_property_index_hints(memgraph):

    expected_explain_with_hint = [
        " * Produce {n}",
-        " * Filter (n :Label), {n.id1}, {n.id2}",
+        " * Filter {n.id2}",
        " * ScanAllByLabelPropertyValue (n :Label {id1})",
        " * Once",
    ]
@ -275,12 +276,13 @@ def test_multiple_applicable_label_property_index_hints_alternative_orderings(me

    expected_explain_with_hint_1 = [
        " * Produce {n}",
-        " * Filter (n :Label), {n.id1}, {n.id2}",
+        " * Filter {n.id2}",
        " * ScanAllByLabelPropertyValue (n :Label {id1})",
        " * Once",
    ]
    expected_explain_with_hint_2 = [
-        row.replace("(n :Label {id1})", "(n :Label {id2})") for row in expected_explain_with_hint_1
+        row.replace("(n :Label {id1})", "(n :Label {id2})").replace(" * Filter {n.id2}", " * Filter {n.id1}")
+        for row in expected_explain_with_hint_1
    ]

    explain_with_hint_ordering_1a = [
@ -407,6 +409,7 @@ def test_multiple_match_query(memgraph):
    memgraph.execute("CREATE INDEX ON :Label2;")
    memgraph.execute("CREATE INDEX ON :Label3;")

+    # TODO: Fix this test since it has the filtering info wrong (filtering by label that's already indexed)
    expected_explain_with_hint = [
        " * Produce {n, m}",
        " * Cartesian {m : n}",
@ -414,7 +417,7 @@ def test_multiple_match_query(memgraph):
        " | * Filter (n :Label1:Label2), {n.id}",
        " | * ScanAllByLabel (n :Label1)",
        " | * Once",
-        " * Filter (m :Label2:Label3), (n :Label1:Label2), {n.id}",
+        " * Filter (m :Label2:Label3)",
        " * ScanAllByLabel (m :Label2)",
        " * Once",
    ]