From d58a464141ee3265d85b6b3a03e0115316bc4cd0 Mon Sep 17 00:00:00 2001
From: Josipmrden <josip.mrden@memgraph.io>
Date: Sun, 3 Dec 2023 21:23:52 +0100
Subject: [PATCH] Remove filter profile info (#1481)

---
 src/query/plan/rewrite/index_lookup.hpp     |  5 +++
 src/query/plan/rewrite/join.hpp             |  6 ++++
 src/query/plan/rule_based_planner.hpp       |  7 ++--
 tests/e2e/CMakeLists.txt                    |  1 +
 tests/e2e/analyze_graph/optimize_indexes.py | 16 ++++-----
 tests/e2e/filter_info/CMakeLists.txt        |  6 ++++
 tests/e2e/filter_info/common.py             | 23 ++++++++++++
 tests/e2e/filter_info/filter_info.py        | 39 +++++++++++++++++++++
 tests/e2e/filter_info/workloads.yaml        | 13 +++++++
 tests/e2e/index_hints/index_hints.py        | 19 +++++-----
 10 files changed, 116 insertions(+), 19 deletions(-)
 create mode 100644 tests/e2e/filter_info/CMakeLists.txt
 create mode 100644 tests/e2e/filter_info/common.py
 create mode 100644 tests/e2e/filter_info/filter_info.py
 create mode 100644 tests/e2e/filter_info/workloads.yaml
diff --git a/src/query/plan/rewrite/index_lookup.hpp b/src/query/plan/rewrite/index_lookup.hpp
index 7bb88f659..590bad5f4 100644
--- a/src/query/plan/rewrite/index_lookup.hpp
+++ b/src/query/plan/rewrite/index_lookup.hpp
@@ -106,6 +106,11 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
     prev_ops_.pop_back();
     ExpressionRemovalResult removal = RemoveExpressions(op.expression_, filter_exprs_for_removal_);
     op.expression_ = removal.trimmed_expression;
+    if (op.expression_) {
+      Filters leftover_filters;
+      leftover_filters.CollectFilterExpression(op.expression_, *symbol_table_);
+      op.all_filters_ = std::move(leftover_filters);
+    }
 
     // edge uniqueness filter comes always before filter in plan generation
     LogicalOperator *input = op.input().get();
diff --git a/src/query/plan/rewrite/join.hpp b/src/query/plan/rewrite/join.hpp
index 65e32b3e8..e346ded45 100644
--- a/src/query/plan/rewrite/join.hpp
+++ b/src/query/plan/rewrite/join.hpp
@@ -59,6 +59,12 @@ class JoinRewriter final : public HierarchicalLogicalOperatorVisitor {
 
     ExpressionRemovalResult removal = RemoveExpressions(op.expression_, filter_exprs_for_removal_);
     op.expression_ = removal.trimmed_expression;
+    if (op.expression_) {
+      Filters leftover_filters;
+      leftover_filters.CollectFilterExpression(op.expression_, *symbol_table_);
+      op.all_filters_ = std::move(leftover_filters);
+    }
+
     if (!op.expression_ || utils::Contains(filter_exprs_for_removal_, op.expression_)) {
       SetOnParent(op.input());
     }
diff --git a/src/query/plan/rule_based_planner.hpp b/src/query/plan/rule_based_planner.hpp
index b58d0170b..a61658c90 100644
--- a/src/query/plan/rule_based_planner.hpp
+++ b/src/query/plan/rule_based_planner.hpp
@@ -897,13 +897,14 @@ class RuleBasedPlanner {
   std::unique_ptr<LogicalOperator> GenFilters(std::unique_ptr<LogicalOperator> last_op,
                                               const std::unordered_set<Symbol> &bound_symbols, Filters &filters,
                                               AstStorage &storage, const SymbolTable &symbol_table) {
-    auto all_filters = filters;
     auto pattern_filters = ExtractPatternFilters(filters, symbol_table, storage, bound_symbols);
     auto *filter_expr = impl::ExtractFilters(bound_symbols, filters, storage);
 
     if (filter_expr) {
-      last_op =
-          std::make_unique<Filter>(std::move(last_op), std::move(pattern_filters), filter_expr, std::move(all_filters));
+      Filters operator_filters;
+      operator_filters.CollectFilterExpression(filter_expr, symbol_table);
+      last_op = std::make_unique<Filter>(std::move(last_op), std::move(pattern_filters), filter_expr,
+                                         std::move(operator_filters));
     }
     return last_op;
   }
diff --git a/tests/e2e/CMakeLists.txt b/tests/e2e/CMakeLists.txt
index 28fe94559..da96f6d4a 100644
--- a/tests/e2e/CMakeLists.txt
+++ b/tests/e2e/CMakeLists.txt
@@ -70,6 +70,7 @@ add_subdirectory(index_hints)
 add_subdirectory(query_modules)
 add_subdirectory(constraints)
 add_subdirectory(inspect_query)
+add_subdirectory(filter_info)
 add_subdirectory(queries)
 add_subdirectory(garbage_collection)
 
diff --git a/tests/e2e/analyze_graph/optimize_indexes.py b/tests/e2e/analyze_graph/optimize_indexes.py
index 325993ca1..c0530a827 100644
--- a/tests/e2e/analyze_graph/optimize_indexes.py
+++ b/tests/e2e/analyze_graph/optimize_indexes.py
@@ -62,7 +62,7 @@ def test_analyze_graph_delete_statistics(delete_query, multi_db):
     # After deleting statistics, id2 should be chosen because it has less vertices
     expected_explain_after_delete_analysis = [
         (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id1}}",),
         (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
         (f" * Once",),
     ]
@@ -96,7 +96,7 @@ def test_analyze_full_graph(analyze_query, multi_db):
     # Choose id2 before tha analysis because it has less vertices
     expected_explain_before_analysis = [
         (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id1}}",),
         (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
         (f" * Once",),
     ]
@@ -117,7 +117,7 @@ def test_analyze_full_graph(analyze_query, multi_db):
     # After analyzing graph, id1 index should be chosen because it has smaller average group size
     expected_explain_after_analysis = [
         (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id2}}",),
         (f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
         (f" * Once",),
     ]
@@ -152,7 +152,7 @@ def test_cardinality_different_avg_group_size_uniform_dist(multi_db):
     assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 20, 5, 0, 0)
     expected_explain_after_analysis = [
         (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id2}}",),
         (f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
         (f" * Once",),
     ]
@@ -183,7 +183,7 @@ def test_cardinality_same_avg_group_size_uniform_dist_diff_vertex_count(multi_db
     assert analyze_graph_results[1 - first_index] == ("Label", "id2", 50, 50, 1, 0, 0)
     expected_explain_after_analysis = [
         (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id1}}",),
         (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
         (f" * Once",),
     ]
@@ -214,7 +214,7 @@ def test_large_diff_in_num_vertices_v1(multi_db):
     assert analyze_graph_results[1 - first_index] == ("Label", "id2", 99, 1, 99, 0, 0)
     expected_explain_after_analysis = [
         (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id1}}",),
         (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
         (f" * Once",),
     ]
@@ -245,7 +245,7 @@ def test_large_diff_in_num_vertices_v2(multi_db):
     assert analyze_graph_results[1 - first_index] == ("Label", "id2", 1000, 1000, 1, 0, 0)
     expected_explain_after_analysis = [
         (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id2}}",),
         (f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
         (f" * Once",),
     ]
@@ -286,7 +286,7 @@ def test_same_avg_group_size_diff_distribution(multi_db):
     assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 5, 20, 0, 0)
     expected_explain_after_analysis = [
         (f" * Produce {{n}}",),
-        (f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
+        (f" * Filter {{n.id1}}",),
         (f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
         (f" * Once",),
     ]
diff --git a/tests/e2e/filter_info/CMakeLists.txt b/tests/e2e/filter_info/CMakeLists.txt
new file mode 100644
index 000000000..401ec46a6
--- /dev/null
+++ b/tests/e2e/filter_info/CMakeLists.txt
@@ -0,0 +1,6 @@
+function(copy_filter_info_e2e_python_files FILE_NAME)
+    copy_e2e_python_files(filter_info ${FILE_NAME})
+endfunction()
+
+copy_filter_info_e2e_python_files(common.py)
+copy_filter_info_e2e_python_files(filter_info.py)
diff --git a/tests/e2e/filter_info/common.py b/tests/e2e/filter_info/common.py
new file mode 100644
index 000000000..1eb81a94b
--- /dev/null
+++ b/tests/e2e/filter_info/common.py
@@ -0,0 +1,23 @@
+# Copyright 2023 Memgraph Ltd.
+#
+# Use of this software is governed by the Business Source License
+# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
+# License, and you may not use this file except in compliance with the Business Source License.
+#
+# As of the Change Date specified in that file, in accordance with
+# the Business Source License, use of this software will be governed
+# by the Apache License, Version 2.0, included in the file
+# licenses/APL.txt.
+
+import pytest
+from gqlalchemy import Memgraph
+
+
+@pytest.fixture
+def memgraph(**kwargs) -> Memgraph:
+    memgraph = Memgraph()
+
+    yield memgraph
+
+    memgraph.drop_database()
+    memgraph.drop_indexes()
diff --git a/tests/e2e/filter_info/filter_info.py b/tests/e2e/filter_info/filter_info.py
new file mode 100644
index 000000000..53bc737ea
--- /dev/null
+++ b/tests/e2e/filter_info/filter_info.py
@@ -0,0 +1,39 @@
+# Copyright 2023 Memgraph Ltd.
+#
+# Use of this software is governed by the Business Source License
+# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
+# License, and you may not use this file except in compliance with the Business Source License.
+#
+# As of the Change Date specified in that file, in accordance with
+# the Business Source License, use of this software will be governed
+# by the Apache License, Version 2.0, included in the file
+# licenses/APL.txt.
+
+import sys
+
+import pytest
+from common import memgraph
+
+
+def test_label_index_hint(memgraph):
+    memgraph.execute("CREATE (n:Label1:Label2 {prop: 1});")
+    memgraph.execute("CREATE INDEX ON :Label1;")
+
+    # TODO: Fix this test since it should only filter on :Label2 and prop
+    expected_explain = [
+        " * Produce {n}",
+        " * Filter (n :Label1:Label2), {n.prop}",
+        " * ScanAllByLabel (n :Label1)",
+        " * Once",
+    ]
+
+    actual_explain = [
+        row["QUERY PLAN"]
+        for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1:Label2) WHERE n.prop = 1 return n;")
+    ]
+
+    assert expected_explain == actual_explain
+
+
+if __name__ == "__main__":
+    sys.exit(pytest.main([__file__, "-rA"]))
diff --git a/tests/e2e/filter_info/workloads.yaml b/tests/e2e/filter_info/workloads.yaml
new file mode 100644
index 000000000..69c98f741
--- /dev/null
+++ b/tests/e2e/filter_info/workloads.yaml
@@ -0,0 +1,13 @@
+filter_info_cluster: &filter_info_cluster
+  cluster:
+    main:
+      args: ["--bolt-port", "7687", "--log-level=TRACE"]
+      log_file: "filter_info.log"
+      setup_queries: []
+      validation_queries: []
+
+workloads:
+  - name: "Filter info information"
+    binary: "tests/e2e/pytest_runner.sh"
+    args: ["filter_info/filter_info.py"]
+    <<: *filter_info_cluster
diff --git a/tests/e2e/index_hints/index_hints.py b/tests/e2e/index_hints/index_hints.py
index 85b63a84b..70d3ce6b6 100644
--- a/tests/e2e/index_hints/index_hints.py
+++ b/tests/e2e/index_hints/index_hints.py
@@ -162,12 +162,13 @@ def test_label_property_index_hint(memgraph):
 
     expected_explain_no_hint = [
         " * Produce {n}",
-        " * Filter (n :Label), {n.id1}, {n.id2}",
+        " * Filter {n.id1}",
         " * ScanAllByLabelPropertyValue (n :Label {id2})",
         " * Once",
     ]
     expected_explain_with_hint = [
-        row.replace("(n :Label {id2})", "(n :Label {id1})") for row in expected_explain_no_hint
+        row.replace("(n :Label {id2})", "(n :Label {id1})").replace(" * Filter {n.id1}", " * Filter {n.id2}")
+        for row in expected_explain_no_hint
     ]
 
     explain_no_hint = [
@@ -192,7 +193,7 @@ def test_label_property_index_hint_alternative_orderings(memgraph):
 
     expected_explain_with_hint = [
         " * Produce {n}",
-        " * Filter (n :Label), {n.id1}, {n.id2}",
+        " * Filter {n.id2}",
         " * ScanAllByLabelPropertyValue (n :Label {id1})",
         " * Once",
     ]
@@ -221,7 +222,7 @@ def test_multiple_label_property_index_hints(memgraph):
 
     expected_explain_with_hint = [
         " * Produce {n}",
-        " * Filter (n :Label), {n.id1}, {n.id2}",
+        " * Filter {n.id2}",
         " * ScanAllByLabelPropertyValue (n :Label {id1})",
         " * Once",
     ]
@@ -251,7 +252,7 @@ def test_multiple_applicable_label_property_index_hints(memgraph):
 
     expected_explain_with_hint = [
         " * Produce {n}",
-        " * Filter (n :Label), {n.id1}, {n.id2}",
+        " * Filter {n.id2}",
         " * ScanAllByLabelPropertyValue (n :Label {id1})",
         " * Once",
     ]
@@ -275,12 +276,13 @@ def test_multiple_applicable_label_property_index_hints_alternative_orderings(me
 
     expected_explain_with_hint_1 = [
         " * Produce {n}",
-        " * Filter (n :Label), {n.id1}, {n.id2}",
+        " * Filter {n.id2}",
         " * ScanAllByLabelPropertyValue (n :Label {id1})",
         " * Once",
     ]
     expected_explain_with_hint_2 = [
-        row.replace("(n :Label {id1})", "(n :Label {id2})") for row in expected_explain_with_hint_1
+        row.replace("(n :Label {id1})", "(n :Label {id2})").replace(" * Filter {n.id2}", " * Filter {n.id1}")
+        for row in expected_explain_with_hint_1
     ]
 
     explain_with_hint_ordering_1a = [
@@ -407,6 +409,7 @@ def test_multiple_match_query(memgraph):
     memgraph.execute("CREATE INDEX ON :Label2;")
     memgraph.execute("CREATE INDEX ON :Label3;")
 
+    # TODO: Fix this test since it has the filtering info wrong (filtering by label that's already indexed)
     expected_explain_with_hint = [
         " * Produce {n, m}",
         " * Cartesian {m : n}",
@@ -414,7 +417,7 @@ def test_multiple_match_query(memgraph):
         " | * Filter (n :Label1:Label2), {n.id}",
         " | * ScanAllByLabel (n :Label1)",
         " | * Once",
-        " * Filter (m :Label2:Label3), (n :Label1:Label2), {n.id}",
+        " * Filter (m :Label2:Label3)",
         " * ScanAllByLabel (m :Label2)",
         " * Once",
     ]