Merge branch 'master' into Implement-constant-time-label-and-edge-type-retrieval

This commit is contained in:
gvolfing 2023-12-04 08:00:02 +01:00
commit 31efe28878
18 changed files with 324 additions and 37 deletions

View File

@ -188,6 +188,8 @@ class ExpressionEvaluator : public ExpressionVisitor<TypedValue> {
utils::MemoryResource *GetMemoryResource() const { return ctx_->memory; }
void ResetPropertyLookupCache() { property_lookup_cache_.clear(); }
TypedValue Visit(NamedExpression &named_expression) override {
const auto &symbol = symbol_table_->at(named_expression);
auto value = named_expression.expression_->Accept(*this);

View File

@ -3676,6 +3676,7 @@ class AggregateCursor : public Cursor {
void ProcessOne(const Frame &frame, ExpressionEvaluator *evaluator) {
// Preallocated group_by, since most of the time the aggregation key won't be unique
reused_group_by_.clear();
evaluator->ResetPropertyLookupCache();
for (Expression *expression : self_.group_by_) {
reused_group_by_.emplace_back(expression->Accept(*evaluator));

View File

@ -106,6 +106,11 @@ class IndexLookupRewriter final : public HierarchicalLogicalOperatorVisitor {
prev_ops_.pop_back();
ExpressionRemovalResult removal = RemoveExpressions(op.expression_, filter_exprs_for_removal_);
op.expression_ = removal.trimmed_expression;
if (op.expression_) {
Filters leftover_filters;
leftover_filters.CollectFilterExpression(op.expression_, *symbol_table_);
op.all_filters_ = std::move(leftover_filters);
}
// edge uniqueness filter comes always before filter in plan generation
LogicalOperator *input = op.input().get();

View File

@ -59,6 +59,12 @@ class JoinRewriter final : public HierarchicalLogicalOperatorVisitor {
ExpressionRemovalResult removal = RemoveExpressions(op.expression_, filter_exprs_for_removal_);
op.expression_ = removal.trimmed_expression;
if (op.expression_) {
Filters leftover_filters;
leftover_filters.CollectFilterExpression(op.expression_, *symbol_table_);
op.all_filters_ = std::move(leftover_filters);
}
if (!op.expression_ || utils::Contains(filter_exprs_for_removal_, op.expression_)) {
SetOnParent(op.input());
}

View File

@ -511,10 +511,6 @@ class RuleBasedPlanner {
std::set<ExpansionGroupId> visited_expansion_groups;
last_op =
GenerateExpansionOnAlreadySeenSymbols(std::move(last_op), matching, visited_expansion_groups, symbol_table,
storage, bound_symbols, new_symbols, named_paths, filters, view);
// We want to create separate branches of scan operators for each expansion group group of patterns
// Whenever there are 2 scan branches, they will be joined with a Cartesian operator
@ -528,6 +524,14 @@ class RuleBasedPlanner {
continue;
}
last_op =
GenerateExpansionOnAlreadySeenSymbols(std::move(last_op), matching, visited_expansion_groups, symbol_table,
storage, bound_symbols, new_symbols, named_paths, filters, view);
if (visited_expansion_groups.contains(expansion.expansion_group_id)) {
continue;
}
std::unique_ptr<LogicalOperator> starting_expansion_operator = nullptr;
if (!initial_expansion_done) {
starting_expansion_operator = std::move(last_op);
@ -897,13 +901,14 @@ class RuleBasedPlanner {
std::unique_ptr<LogicalOperator> GenFilters(std::unique_ptr<LogicalOperator> last_op,
const std::unordered_set<Symbol> &bound_symbols, Filters &filters,
AstStorage &storage, const SymbolTable &symbol_table) {
auto all_filters = filters;
auto pattern_filters = ExtractPatternFilters(filters, symbol_table, storage, bound_symbols);
auto *filter_expr = impl::ExtractFilters(bound_symbols, filters, storage);
if (filter_expr) {
last_op =
std::make_unique<Filter>(std::move(last_op), std::move(pattern_filters), filter_expr, std::move(all_filters));
Filters operator_filters;
operator_filters.CollectFilterExpression(filter_expr, symbol_table);
last_op = std::make_unique<Filter>(std::move(last_op), std::move(pattern_filters), filter_expr,
std::move(operator_filters));
}
return last_op;
}

View File

@ -71,8 +71,10 @@ add_subdirectory(index_hints)
add_subdirectory(query_modules)
add_subdirectory(constraints)
add_subdirectory(inspect_query)
add_subdirectory(filter_info)
add_subdirectory(queries)
add_subdirectory(garbage_collection)
add_subdirectory(query_planning)
copy_e2e_python_files(pytest_runner pytest_runner.sh "")
copy_e2e_python_files(x x.sh "")

View File

@ -62,7 +62,7 @@ def test_analyze_graph_delete_statistics(delete_query, multi_db):
# After deleting statistics, id2 should be chosen because it has less vertices
expected_explain_after_delete_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id1}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
(f" * Once",),
]
@ -96,7 +96,7 @@ def test_analyze_full_graph(analyze_query, multi_db):
# Choose id2 before tha analysis because it has less vertices
expected_explain_before_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id1}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
(f" * Once",),
]
@ -117,7 +117,7 @@ def test_analyze_full_graph(analyze_query, multi_db):
# After analyzing graph, id1 index should be chosen because it has smaller average group size
expected_explain_after_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id2}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
(f" * Once",),
]
@ -152,7 +152,7 @@ def test_cardinality_different_avg_group_size_uniform_dist(multi_db):
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 20, 5, 0, 0)
expected_explain_after_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id2}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
(f" * Once",),
]
@ -183,7 +183,7 @@ def test_cardinality_same_avg_group_size_uniform_dist_diff_vertex_count(multi_db
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 50, 50, 1, 0, 0)
expected_explain_after_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id1}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
(f" * Once",),
]
@ -214,7 +214,7 @@ def test_large_diff_in_num_vertices_v1(multi_db):
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 99, 1, 99, 0, 0)
expected_explain_after_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id1}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
(f" * Once",),
]
@ -245,7 +245,7 @@ def test_large_diff_in_num_vertices_v2(multi_db):
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 1000, 1000, 1, 0, 0)
expected_explain_after_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id2}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id1}})",),
(f" * Once",),
]
@ -286,7 +286,7 @@ def test_same_avg_group_size_diff_distribution(multi_db):
assert analyze_graph_results[1 - first_index] == ("Label", "id2", 100, 5, 20, 0, 0)
expected_explain_after_analysis = [
(f" * Produce {{n}}",),
(f" * Filter (n :Label), {{n.id1}}, {{n.id2}}",),
(f" * Filter {{n.id1}}",),
(f" * ScanAllByLabelPropertyValue (n :Label {{id2}})",),
(f" * Once",),
]

View File

@ -0,0 +1,6 @@
function(copy_filter_info_e2e_python_files FILE_NAME)
copy_e2e_python_files(filter_info ${FILE_NAME})
endfunction()
copy_filter_info_e2e_python_files(common.py)
copy_filter_info_e2e_python_files(filter_info.py)

View File

@ -0,0 +1,23 @@
# Copyright 2023 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import pytest
from gqlalchemy import Memgraph
@pytest.fixture
def memgraph(**kwargs) -> Memgraph:
memgraph = Memgraph()
yield memgraph
memgraph.drop_database()
memgraph.drop_indexes()

View File

@ -0,0 +1,39 @@
# Copyright 2023 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import sys
import pytest
from common import memgraph
def test_label_index_hint(memgraph):
memgraph.execute("CREATE (n:Label1:Label2 {prop: 1});")
memgraph.execute("CREATE INDEX ON :Label1;")
# TODO: Fix this test since it should only filter on :Label2 and prop
expected_explain = [
" * Produce {n}",
" * Filter (n :Label1:Label2), {n.prop}",
" * ScanAllByLabel (n :Label1)",
" * Once",
]
actual_explain = [
row["QUERY PLAN"]
for row in memgraph.execute_and_fetch("EXPLAIN MATCH (n:Label1:Label2) WHERE n.prop = 1 return n;")
]
assert expected_explain == actual_explain
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-rA"]))

View File

@ -0,0 +1,13 @@
filter_info_cluster: &filter_info_cluster
cluster:
main:
args: ["--bolt-port", "7687", "--log-level=TRACE"]
log_file: "filter_info.log"
setup_queries: []
validation_queries: []
workloads:
- name: "Filter info information"
binary: "tests/e2e/pytest_runner.sh"
args: ["filter_info/filter_info.py"]
<<: *filter_info_cluster

View File

@ -162,12 +162,13 @@ def test_label_property_index_hint(memgraph):
expected_explain_no_hint = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * Filter {n.id1}",
" * ScanAllByLabelPropertyValue (n :Label {id2})",
" * Once",
]
expected_explain_with_hint = [
row.replace("(n :Label {id2})", "(n :Label {id1})") for row in expected_explain_no_hint
row.replace("(n :Label {id2})", "(n :Label {id1})").replace(" * Filter {n.id1}", " * Filter {n.id2}")
for row in expected_explain_no_hint
]
explain_no_hint = [
@ -192,7 +193,7 @@ def test_label_property_index_hint_alternative_orderings(memgraph):
expected_explain_with_hint = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * Filter {n.id2}",
" * ScanAllByLabelPropertyValue (n :Label {id1})",
" * Once",
]
@ -221,7 +222,7 @@ def test_multiple_label_property_index_hints(memgraph):
expected_explain_with_hint = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * Filter {n.id2}",
" * ScanAllByLabelPropertyValue (n :Label {id1})",
" * Once",
]
@ -251,7 +252,7 @@ def test_multiple_applicable_label_property_index_hints(memgraph):
expected_explain_with_hint = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * Filter {n.id2}",
" * ScanAllByLabelPropertyValue (n :Label {id1})",
" * Once",
]
@ -275,12 +276,13 @@ def test_multiple_applicable_label_property_index_hints_alternative_orderings(me
expected_explain_with_hint_1 = [
" * Produce {n}",
" * Filter (n :Label), {n.id1}, {n.id2}",
" * Filter {n.id2}",
" * ScanAllByLabelPropertyValue (n :Label {id1})",
" * Once",
]
expected_explain_with_hint_2 = [
row.replace("(n :Label {id1})", "(n :Label {id2})") for row in expected_explain_with_hint_1
row.replace("(n :Label {id1})", "(n :Label {id2})").replace(" * Filter {n.id2}", " * Filter {n.id1}")
for row in expected_explain_with_hint_1
]
explain_with_hint_ordering_1a = [
@ -407,6 +409,7 @@ def test_multiple_match_query(memgraph):
memgraph.execute("CREATE INDEX ON :Label2;")
memgraph.execute("CREATE INDEX ON :Label3;")
# TODO: Fix this test since it has the filtering info wrong (filtering by label that's already indexed)
expected_explain_with_hint = [
" * Produce {n, m}",
" * Cartesian {m : n}",
@ -414,7 +417,7 @@ def test_multiple_match_query(memgraph):
" | * Filter (n :Label1:Label2), {n.id}",
" | * ScanAllByLabel (n :Label1)",
" | * Once",
" * Filter (m :Label2:Label3), (n :Label1:Label2), {n.id}",
" * Filter (m :Label2:Label3)",
" * ScanAllByLabel (m :Label2)",
" * Once",
]

View File

@ -0,0 +1,6 @@
function(copy_query_planning_e2e_python_files FILE_NAME)
copy_e2e_python_files(query_planning ${FILE_NAME})
endfunction()
copy_query_planning_e2e_python_files(common.py)
copy_query_planning_e2e_python_files(query_planning_cartesian.py)

View File

@ -0,0 +1,24 @@
# Copyright 2023 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import pytest
from gqlalchemy import Memgraph
@pytest.fixture
def memgraph(**kwargs) -> Memgraph:
memgraph = Memgraph()
yield memgraph
memgraph.drop_indexes()
memgraph.ensure_constraints([])
memgraph.drop_database()

View File

@ -0,0 +1,42 @@
# Copyright 2023 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.
import sys
import pytest
from common import memgraph
QUERY_PLAN = "QUERY PLAN"
def test_indexed_join_with_indices(memgraph):
memgraph.execute("CREATE INDEX ON :Node(id);")
expected_explain = [
f" * Produce {{a, b, r}}",
f" * Filter (a :Node), {{a.id}}",
f" * Expand (b)-[r:EDGE]-(a)",
f" * ScanAllByLabelPropertyValue (b :Node {{id}})",
f" * Once",
]
results = list(
memgraph.execute_and_fetch(
"EXPLAIN MATCH (a:Node {id: 1}) MATCH (b:Node {id: 2}) MATCH (a)-[r:EDGE]-(b) return a,b,r;"
)
)
actual_explain = [x[QUERY_PLAN] for x in results]
assert expected_explain == actual_explain
if __name__ == "__main__":
sys.exit(pytest.main([__file__, "-rA"]))

View File

@ -0,0 +1,14 @@
queries_cluster: &queries_cluster
cluster:
main:
args: ["--bolt-port", "7687", "--log-level=TRACE"]
log_file: "query_planning.log"
setup_queries: []
validation_queries: []
workloads:
- name: "Query planning cartesian"
binary: "tests/e2e/pytest_runner.sh"
args: ["query_planning/query_planning_cartesian.py"]
<<: *queries_cluster

View File

@ -410,19 +410,67 @@ Feature: Aggregations
CREATE (s:Subnet {ip: "192.168.0.1"})
"""
When executing query:
"""
MATCH (subnet:Subnet) WHERE FALSE WITH subnet, collect(subnet.ip) as ips RETURN id(subnet) as id
"""
"""
MATCH (subnet:Subnet) WHERE FALSE WITH subnet, collect(subnet.ip) as ips RETURN id(subnet) as id
"""
Then the result should be empty
Scenario: Empty count aggregation:
Scenario: Empty count aggregation
Given an empty graph
And having executed
"""
CREATE (s:Subnet {ip: "192.168.0.1"})
"""
When executing query:
"""
MATCH (subnet:Subnet) WHERE FALSE WITH subnet, count(subnet.ip) as ips RETURN id(subnet) as id
"""
"""
MATCH (subnet:Subnet) WHERE FALSE WITH subnet, count(subnet.ip) as ips RETURN id(subnet) as id
"""
Then the result should be empty
Scenario: Collect nodes properties into a map:
Given an empty graph
And having executed
"""
CREATE (t:Tag {short_code: "TST", description: "SYSTEM_TAG"}), (t2:Tag {short_code: "PRD", description: "SYSTEM_TAG"}),
(t3:Tag {short_code: "STG", description: "SYSTEM_TAG"}), (device {name: "name1"}), (device)-[a1:ASSOCIATED]->(t),
(device)-[a2:ASSOCIATED]->(t2), (device)-[a3:ASSOCIATED]->(t3);
"""
When executing query:
"""
MATCH (d {name: "name1"})-[t:ASSOCIATED]-(tag:Tag) RETURN collect({short_code: tag.short_code, description: tag.description}) as tags;
"""
Then the result should be:
| tags |
| [{description: 'SYSTEM_TAG', short_code: 'TST'}, {description: 'SYSTEM_TAG', short_code: 'PRD'}, {description: 'SYSTEM_TAG', short_code: 'STG'}] |
Scenario: Count directly without WITH clause 01
Given an empty graph
And having executed
"""
CREATE (:Node {prop1: 1, prop2: 2, prop3: 3}), (:Node {prop1: 10, prop2: 11, prop3: 12}), (:Node {prop1: 20, prop2: 21, prop3: 22})
"""
When executing query:
"""
MATCH (n) RETURN n.prop1, n.prop2, n.prop3, count(*) AS cnt
"""
Then the result should be:
| n.prop1 | n.prop2 | n.prop3 | cnt |
| 20 | 21 | 22 | 1 |
| 10 | 11 | 12 | 1 |
| 1 | 2 | 3 | 1 |
Scenario: Count directly without WITH clause 02
Given an empty graph
And having executed
"""
CREATE (:Node {prop1: 1, prop2: 2, prop3: 3}), (:Node {prop1: 10, prop2: 11, prop3: 12}), (:Node {prop1: 20, prop2: 21, prop3: 22})
"""
When executing query:
"""
MATCH (n) WITH n.prop1 AS prop1, n.prop2 AS prop2, n.prop3 AS prop3 RETURN prop1, prop2, prop3, count(*) AS cnt;
"""
Then the result should be:
| prop1 | prop2 | prop3 | cnt |
| 20 | 21 | 22 | 1 |
| 10 | 11 | 12 | 1 |
| 1 | 2 | 3 | 1 |

View File

@ -410,19 +410,67 @@ Feature: Aggregations
CREATE (s:Subnet {ip: "192.168.0.1"})
"""
When executing query:
"""
MATCH (subnet:Subnet) WHERE FALSE WITH subnet, collect(subnet.ip) as ips RETURN id(subnet) as id
"""
"""
MATCH (subnet:Subnet) WHERE FALSE WITH subnet, collect(subnet.ip) as ips RETURN id(subnet) as id
"""
Then the result should be empty
Scenario: Empty count aggregation:
Scenario: Empty count aggregation
Given an empty graph
And having executed
"""
CREATE (s:Subnet {ip: "192.168.0.1"})
"""
When executing query:
"""
MATCH (subnet:Subnet) WHERE FALSE WITH subnet, count(subnet.ip) as ips RETURN id(subnet) as id
"""
"""
MATCH (subnet:Subnet) WHERE FALSE WITH subnet, count(subnet.ip) as ips RETURN id(subnet) as id
"""
Then the result should be empty
Scenario: Collect nodes properties into a map:
Given an empty graph
And having executed
"""
CREATE (t:Tag {short_code: "TST", description: "SYSTEM_TAG"}), (t2:Tag {short_code: "PRD", description: "SYSTEM_TAG"}),
(t3:Tag {short_code: "STG", description: "SYSTEM_TAG"}), (device {name: "name1"}), (device)-[a1:ASSOCIATED]->(t),
(device)-[a2:ASSOCIATED]->(t2), (device)-[a3:ASSOCIATED]->(t3);
"""
When executing query:
"""
MATCH (d {name: "name1"})-[t:ASSOCIATED]-(tag:Tag) RETURN collect({short_code: tag.short_code, description: tag.description}) as tags;
"""
Then the result should be:
| tags |
| [{description: 'SYSTEM_TAG', short_code: 'TST'}, {description: 'SYSTEM_TAG', short_code: 'PRD'}, {description: 'SYSTEM_TAG', short_code: 'STG'}] |
Scenario: Count directly without WITH clause 01
Given an empty graph
And having executed
"""
CREATE (:Node {prop1: 1, prop2: 2, prop3: 3}), (:Node {prop1: 10, prop2: 11, prop3: 12}), (:Node {prop1: 20, prop2: 21, prop3: 22})
"""
When executing query:
"""
MATCH (n) RETURN n.prop1, n.prop2, n.prop3, count(*) AS cnt
"""
Then the result should be:
| n.prop1 | n.prop2 | n.prop3 | cnt |
| 20 | 21 | 22 | 1 |
| 10 | 11 | 12 | 1 |
| 1 | 2 | 3 | 1 |
Scenario: Count directly without WITH clause 02
Given an empty graph
And having executed
"""
CREATE (:Node {prop1: 1, prop2: 2, prop3: 3}), (:Node {prop1: 10, prop2: 11, prop3: 12}), (:Node {prop1: 20, prop2: 21, prop3: 22})
"""
When executing query:
"""
MATCH (n) WITH n.prop1 AS prop1, n.prop2 AS prop2, n.prop3 AS prop3 RETURN prop1, prop2, prop3, count(*) AS cnt;
"""
Then the result should be:
| prop1 | prop2 | prop3 | cnt |
| 20 | 21 | 22 | 1 |
| 10 | 11 | 12 | 1 |
| 1 | 2 | 3 | 1 |