2018-01-22 17:27:00 +08:00
|
|
|
#include "query/plan/operator.hpp"
|
|
|
|
|
2017-04-11 21:11:48 +08:00
|
|
|
#include <algorithm>
|
2017-07-28 20:30:30 +08:00
|
|
|
#include <limits>
|
2018-02-08 18:45:30 +08:00
|
|
|
#include <queue>
|
2018-02-23 00:20:17 +08:00
|
|
|
#include <random>
|
2018-01-22 17:27:00 +08:00
|
|
|
#include <string>
|
2018-04-19 17:25:02 +08:00
|
|
|
#include <tuple>
|
2017-05-29 21:08:27 +08:00
|
|
|
#include <type_traits>
|
2018-04-19 17:25:02 +08:00
|
|
|
#include <unordered_map>
|
|
|
|
#include <unordered_set>
|
2017-09-11 18:05:19 +08:00
|
|
|
#include <utility>
|
2017-04-11 21:11:48 +08:00
|
|
|
|
2018-01-22 17:27:00 +08:00
|
|
|
#include "boost/archive/binary_iarchive.hpp"
|
|
|
|
#include "boost/archive/binary_oarchive.hpp"
|
|
|
|
#include "boost/serialization/export.hpp"
|
2017-10-11 19:19:10 +08:00
|
|
|
#include "glog/logging.h"
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-07-06 15:28:05 +08:00
|
|
|
#include "communication/result_stream_faker.hpp"
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
#include "database/distributed_graph_db.hpp"
|
2018-01-22 17:27:00 +08:00
|
|
|
#include "database/graph_db_accessor.hpp"
|
2018-05-15 23:38:47 +08:00
|
|
|
#include "distributed/bfs_rpc_clients.hpp"
|
2018-03-23 22:21:46 +08:00
|
|
|
#include "distributed/pull_rpc_clients.hpp"
|
|
|
|
#include "distributed/updates_rpc_clients.hpp"
|
|
|
|
#include "distributed/updates_rpc_server.hpp"
|
2018-06-19 20:37:02 +08:00
|
|
|
#include "integrations/kafka/exceptions.hpp"
|
2018-07-06 15:28:05 +08:00
|
|
|
#include "integrations/kafka/streams.hpp"
|
2017-09-13 16:27:12 +08:00
|
|
|
#include "query/context.hpp"
|
2017-04-03 20:32:29 +08:00
|
|
|
#include "query/exceptions.hpp"
|
|
|
|
#include "query/frontend/ast/ast.hpp"
|
2018-01-22 17:27:00 +08:00
|
|
|
#include "query/frontend/semantic/symbol_table.hpp"
|
2017-04-13 16:01:16 +08:00
|
|
|
#include "query/interpret/eval.hpp"
|
2018-01-22 17:27:00 +08:00
|
|
|
#include "query/path.hpp"
|
2018-02-23 00:07:35 +08:00
|
|
|
#include "utils/algorithm.hpp"
|
2018-02-08 20:27:07 +08:00
|
|
|
#include "utils/exceptions.hpp"
|
2018-04-19 17:25:02 +08:00
|
|
|
#include "utils/hashing/fnv.hpp"
|
2018-05-30 19:00:25 +08:00
|
|
|
#include "utils/thread/sync.hpp"
|
2017-10-11 19:19:10 +08:00
|
|
|
|
2018-02-28 20:59:12 +08:00
|
|
|
DEFINE_HIDDEN_int32(remote_pull_sleep_micros, 10,
|
|
|
|
"Sleep between remote result pulling in microseconds");
|
2018-02-14 22:20:28 +08:00
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
// macro for the default implementation of LogicalOperator::Accept
|
|
|
|
// that accepts the visitor and visits it's input_ operator
|
2017-05-16 15:16:46 +08:00
|
|
|
#define ACCEPT_WITH_INPUT(class_name) \
|
|
|
|
bool class_name::Accept(HierarchicalLogicalOperatorVisitor &visitor) { \
|
|
|
|
if (visitor.PreVisit(*this)) { \
|
|
|
|
input_->Accept(visitor); \
|
|
|
|
} \
|
|
|
|
return visitor.PostVisit(*this); \
|
2017-04-24 16:16:53 +08:00
|
|
|
}
|
|
|
|
|
2018-02-23 22:06:31 +08:00
|
|
|
#define WITHOUT_SINGLE_INPUT(class_name) \
|
|
|
|
bool class_name::HasSingleInput() const { return false; } \
|
|
|
|
std::shared_ptr<LogicalOperator> class_name::input() const { \
|
|
|
|
LOG(FATAL) << "Operator " << #class_name << " has no single input!"; \
|
|
|
|
} \
|
|
|
|
void class_name::set_input(std::shared_ptr<LogicalOperator>) { \
|
|
|
|
LOG(FATAL) << "Operator " << #class_name << " has no single input!"; \
|
|
|
|
}
|
|
|
|
|
2017-04-26 22:12:39 +08:00
|
|
|
namespace query::plan {
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-06-12 21:12:31 +08:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
// Sets a property on a record accessor from a TypedValue. In cases when the
|
|
|
|
// TypedValue cannot be converted to PropertyValue,
|
|
|
|
// QueryRuntimeException is raised.
|
|
|
|
template <class TRecordAccessor>
|
2018-01-16 17:09:15 +08:00
|
|
|
void PropsSetChecked(TRecordAccessor &record, storage::Property key,
|
2017-06-12 21:12:31 +08:00
|
|
|
TypedValue value) {
|
|
|
|
try {
|
|
|
|
record.PropsSet(key, value);
|
|
|
|
} catch (const TypedValueException &) {
|
|
|
|
throw QueryRuntimeException("'{}' cannot be used as a property value.",
|
|
|
|
value.type());
|
2018-01-17 17:56:06 +08:00
|
|
|
} catch (const RecordDeletedError &) {
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Trying to set properties on a deleted graph element.");
|
2017-06-12 21:12:31 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Checks if the given value of the symbol has the expected type. If not, raises
|
|
|
|
// QueryRuntimeException.
|
|
|
|
void ExpectType(Symbol symbol, TypedValue value, TypedValue::Type expected) {
|
|
|
|
if (value.type() != expected)
|
|
|
|
throw QueryRuntimeException("Expected a {} for '{}', but got {}.", expected,
|
|
|
|
symbol.name(), value.type());
|
|
|
|
}
|
|
|
|
|
2017-08-30 21:37:00 +08:00
|
|
|
// Returns boolean result of evaluating filter expression. Null is treated as
|
|
|
|
// false. Other non boolean values raise a QueryRuntimeException.
|
|
|
|
bool EvaluateFilter(ExpressionEvaluator &evaluator, Expression *filter) {
|
|
|
|
TypedValue result = filter->Accept(evaluator);
|
|
|
|
// Null is treated like false.
|
|
|
|
if (result.IsNull()) return false;
|
|
|
|
if (result.type() != TypedValue::Type::Bool)
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Filter expression must be a bool or null, but got {}.", result.type());
|
|
|
|
return result.Value<bool>();
|
|
|
|
}
|
|
|
|
|
2017-06-12 21:12:31 +08:00
|
|
|
} // namespace
|
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Once::OnceCursor::Pull(Frame &, Context &) {
|
2017-04-21 16:57:53 +08:00
|
|
|
if (!did_pull_) {
|
|
|
|
did_pull_ = true;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Once::MakeCursor(database::GraphDbAccessor &) const {
|
2017-05-16 15:16:46 +08:00
|
|
|
return std::make_unique<OnceCursor>();
|
|
|
|
}
|
|
|
|
|
2018-02-23 22:06:31 +08:00
|
|
|
WITHOUT_SINGLE_INPUT(Once);
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void Once::OnceCursor::Reset() { did_pull_ = false; }
|
|
|
|
|
2018-02-20 22:56:27 +08:00
|
|
|
CreateNode::CreateNode(const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
NodeAtom *node_atom, bool on_random_worker)
|
|
|
|
: input_(input ? input : std::make_shared<Once>()),
|
|
|
|
node_atom_(node_atom),
|
|
|
|
on_random_worker_(on_random_worker) {}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-02-23 00:20:17 +08:00
|
|
|
namespace {
|
|
|
|
|
|
|
|
// Returns a random worker id. Worker ID is obtained from the Db.
|
2018-07-26 15:08:21 +08:00
|
|
|
int RandomWorkerId(const database::DistributedGraphDb &db) {
|
2018-02-23 00:20:17 +08:00
|
|
|
thread_local std::mt19937 gen_{std::random_device{}()};
|
|
|
|
thread_local std::uniform_int_distribution<int> rand_;
|
|
|
|
|
|
|
|
auto worker_ids = db.GetWorkerIds();
|
|
|
|
return worker_ids[rand_(gen_) % worker_ids.size()];
|
|
|
|
}
|
|
|
|
|
|
|
|
// Creates a vertex on this GraphDb. Returns a reference to vertex placed on the
|
|
|
|
// frame.
|
|
|
|
VertexAccessor &CreateLocalVertex(NodeAtom *node_atom, Frame &frame,
|
|
|
|
Context &context) {
|
|
|
|
auto &dba = context.db_accessor_;
|
|
|
|
auto new_node = dba.InsertVertex();
|
|
|
|
for (auto label : node_atom->labels_) new_node.add_label(label);
|
|
|
|
|
|
|
|
// Evaluator should use the latest accessors, as modified in this query, when
|
|
|
|
// setting properties on new nodes.
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::NEW);
|
2018-02-23 00:20:17 +08:00
|
|
|
for (auto &kv : node_atom->properties_)
|
|
|
|
PropsSetChecked(new_node, kv.first.second, kv.second->Accept(evaluator));
|
|
|
|
frame[context.symbol_table_.at(*node_atom->identifier_)] = new_node;
|
|
|
|
return frame[context.symbol_table_.at(*node_atom->identifier_)].ValueVertex();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Creates a vertex on the GraphDb with the given worker_id. Can be this worker.
|
|
|
|
VertexAccessor &CreateVertexOnWorker(int worker_id, NodeAtom *node_atom,
|
|
|
|
Frame &frame, Context &context) {
|
|
|
|
auto &dba = context.db_accessor_;
|
|
|
|
|
2018-07-26 15:08:21 +08:00
|
|
|
int current_worker_id = 0;
|
|
|
|
// TODO: Figure out a better solution.
|
|
|
|
if (auto *distributed_db =
|
|
|
|
dynamic_cast<database::DistributedGraphDb *>(&dba.db())) {
|
|
|
|
current_worker_id = distributed_db->WorkerId();
|
|
|
|
} else {
|
|
|
|
CHECK(dynamic_cast<database::SingleNode *>(&dba.db()));
|
|
|
|
}
|
|
|
|
|
|
|
|
if (worker_id == current_worker_id)
|
2018-02-23 00:20:17 +08:00
|
|
|
return CreateLocalVertex(node_atom, frame, context);
|
|
|
|
|
|
|
|
std::unordered_map<storage::Property, query::TypedValue> properties;
|
|
|
|
|
|
|
|
// Evaluator should use the latest accessors, as modified in this query, when
|
|
|
|
// setting properties on new nodes.
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::NEW);
|
2018-02-23 00:20:17 +08:00
|
|
|
for (auto &kv : node_atom->properties_) {
|
|
|
|
auto value = kv.second->Accept(evaluator);
|
|
|
|
if (!value.IsPropertyValue()) {
|
|
|
|
throw QueryRuntimeException("'{}' cannot be used as a property value.",
|
|
|
|
value.type());
|
|
|
|
}
|
|
|
|
properties.emplace(kv.first.second, std::move(value));
|
|
|
|
}
|
|
|
|
|
2018-07-26 15:08:21 +08:00
|
|
|
auto new_node = database::InsertVertexIntoRemote(
|
|
|
|
&dba, worker_id, node_atom->labels_, properties);
|
2018-02-23 00:20:17 +08:00
|
|
|
frame[context.symbol_table_.at(*node_atom->identifier_)] = new_node;
|
|
|
|
return frame[context.symbol_table_.at(*node_atom->identifier_)].ValueVertex();
|
|
|
|
}
|
2018-07-26 15:08:21 +08:00
|
|
|
|
2018-02-23 00:20:17 +08:00
|
|
|
} // namespace
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(CreateNode)
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> CreateNode::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-03-30 17:15:57 +08:00
|
|
|
return std::make_unique<CreateNodeCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> CreateNode::ModifiedSymbols(
|
|
|
|
const SymbolTable &table) const {
|
|
|
|
auto symbols = input_->ModifiedSymbols(table);
|
|
|
|
symbols.emplace_back(table.at(*node_atom_->identifier_));
|
|
|
|
return symbols;
|
|
|
|
}
|
|
|
|
|
2017-04-11 18:29:20 +08:00
|
|
|
CreateNode::CreateNodeCursor::CreateNodeCursor(const CreateNode &self,
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db)
|
2017-04-21 16:57:53 +08:00
|
|
|
: self_(self), db_(db), input_cursor_(self.input_->MakeCursor(db)) {}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool CreateNode::CreateNodeCursor::Pull(Frame &frame, Context &context) {
|
|
|
|
if (input_cursor_->Pull(frame, context)) {
|
2018-02-20 22:56:27 +08:00
|
|
|
if (self_.on_random_worker_) {
|
2018-07-26 15:08:21 +08:00
|
|
|
// TODO: Replace this with some other mechanism
|
|
|
|
auto *distributed_db =
|
|
|
|
dynamic_cast<database::DistributedGraphDb *>(&db_.db());
|
|
|
|
CHECK(distributed_db);
|
|
|
|
CreateVertexOnWorker(RandomWorkerId(*distributed_db), self_.node_atom_,
|
|
|
|
frame, context);
|
2018-02-20 22:56:27 +08:00
|
|
|
} else {
|
2018-02-23 00:20:17 +08:00
|
|
|
CreateLocalVertex(self_.node_atom_, frame, context);
|
2018-02-20 22:56:27 +08:00
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
return true;
|
2017-07-14 19:58:25 +08:00
|
|
|
}
|
|
|
|
return false;
|
2017-03-30 17:15:57 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void CreateNode::CreateNodeCursor::Reset() { input_cursor_->Reset(); }
|
|
|
|
|
2018-01-22 17:27:00 +08:00
|
|
|
CreateExpand::CreateExpand(NodeAtom *node_atom, EdgeAtom *edge_atom,
|
2017-03-30 17:15:57 +08:00
|
|
|
const std::shared_ptr<LogicalOperator> &input,
|
2017-04-24 19:56:50 +08:00
|
|
|
Symbol input_symbol, bool existing_node)
|
2017-03-30 17:15:57 +08:00
|
|
|
: node_atom_(node_atom),
|
|
|
|
edge_atom_(edge_atom),
|
2017-04-26 19:49:41 +08:00
|
|
|
input_(input ? input : std::make_shared<Once>()),
|
2017-03-30 17:15:57 +08:00
|
|
|
input_symbol_(input_symbol),
|
2017-04-24 19:56:50 +08:00
|
|
|
existing_node_(existing_node) {}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(CreateExpand)
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> CreateExpand::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-03-30 17:15:57 +08:00
|
|
|
return std::make_unique<CreateExpandCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> CreateExpand::ModifiedSymbols(
|
|
|
|
const SymbolTable &table) const {
|
|
|
|
auto symbols = input_->ModifiedSymbols(table);
|
|
|
|
symbols.emplace_back(table.at(*node_atom_->identifier_));
|
|
|
|
symbols.emplace_back(table.at(*edge_atom_->identifier_));
|
|
|
|
return symbols;
|
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
CreateExpand::CreateExpandCursor::CreateExpandCursor(
|
|
|
|
const CreateExpand &self, database::GraphDbAccessor &db)
|
2017-03-30 17:15:57 +08:00
|
|
|
: self_(self), db_(db), input_cursor_(self.input_->MakeCursor(db)) {}
|
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool CreateExpand::CreateExpandCursor::Pull(Frame &frame, Context &context) {
|
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
2017-03-30 17:15:57 +08:00
|
|
|
|
|
|
|
// get the origin vertex
|
2017-04-03 20:32:29 +08:00
|
|
|
TypedValue &vertex_value = frame[self_.input_symbol_];
|
2017-06-12 21:12:31 +08:00
|
|
|
ExpectType(self_.input_symbol_, vertex_value, TypedValue::Type::Vertex);
|
2017-04-03 20:32:29 +08:00
|
|
|
auto &v1 = vertex_value.Value<VertexAccessor>();
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-04-03 20:32:29 +08:00
|
|
|
// Similarly to CreateNode, newly created edges and nodes should use the
|
|
|
|
// latest accesors.
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::NEW);
|
2017-04-03 20:32:29 +08:00
|
|
|
// E.g. we pickup new properties: `CREATE (n {p: 42}) -[:r {ep: n.p}]-> ()`
|
|
|
|
v1.SwitchNew();
|
2017-03-30 17:15:57 +08:00
|
|
|
|
|
|
|
// get the destination vertex (possibly an existing node)
|
2018-02-23 00:20:17 +08:00
|
|
|
auto &v2 = OtherVertex(v1.GlobalAddress().worker_id(), frame, context);
|
2017-04-03 20:32:29 +08:00
|
|
|
v2.SwitchNew();
|
2017-03-30 17:15:57 +08:00
|
|
|
|
|
|
|
// create an edge between the two nodes
|
|
|
|
switch (self_.edge_atom_->direction_) {
|
2017-05-30 15:53:27 +08:00
|
|
|
case EdgeAtom::Direction::IN:
|
2017-09-13 16:27:12 +08:00
|
|
|
CreateEdge(v2, v1, frame, context.symbol_table_, evaluator);
|
2017-03-30 17:15:57 +08:00
|
|
|
break;
|
2017-05-30 15:53:27 +08:00
|
|
|
case EdgeAtom::Direction::OUT:
|
2017-09-13 16:27:12 +08:00
|
|
|
CreateEdge(v1, v2, frame, context.symbol_table_, evaluator);
|
2017-03-30 17:15:57 +08:00
|
|
|
break;
|
|
|
|
case EdgeAtom::Direction::BOTH:
|
2017-04-25 21:22:21 +08:00
|
|
|
// in the case of an undirected CreateExpand we choose an arbitrary
|
|
|
|
// direction. this is used in the MERGE clause
|
|
|
|
// it is not allowed in the CREATE clause, and the semantic
|
|
|
|
// checker needs to ensure it doesn't reach this point
|
2017-09-13 16:27:12 +08:00
|
|
|
CreateEdge(v1, v2, frame, context.symbol_table_, evaluator);
|
2017-03-30 17:15:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void CreateExpand::CreateExpandCursor::Reset() { input_cursor_->Reset(); }
|
|
|
|
|
2017-04-03 20:32:29 +08:00
|
|
|
VertexAccessor &CreateExpand::CreateExpandCursor::OtherVertex(
|
2018-02-23 00:20:17 +08:00
|
|
|
int worker_id, Frame &frame, Context &context) {
|
2017-04-24 19:56:50 +08:00
|
|
|
if (self_.existing_node_) {
|
2017-06-12 21:12:31 +08:00
|
|
|
const auto &dest_node_symbol =
|
2018-02-23 00:20:17 +08:00
|
|
|
context.symbol_table_.at(*self_.node_atom_->identifier_);
|
2017-06-12 21:12:31 +08:00
|
|
|
TypedValue &dest_node_value = frame[dest_node_symbol];
|
|
|
|
ExpectType(dest_node_symbol, dest_node_value, TypedValue::Type::Vertex);
|
2017-03-30 17:15:57 +08:00
|
|
|
return dest_node_value.Value<VertexAccessor>();
|
|
|
|
} else {
|
2018-02-23 00:20:17 +08:00
|
|
|
return CreateVertexOnWorker(worker_id, self_.node_atom_, frame, context);
|
2017-03-30 17:15:57 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void CreateExpand::CreateExpandCursor::CreateEdge(
|
|
|
|
VertexAccessor &from, VertexAccessor &to, Frame &frame,
|
2017-04-11 18:29:20 +08:00
|
|
|
const SymbolTable &symbol_table, ExpressionEvaluator &evaluator) {
|
2017-03-30 17:15:57 +08:00
|
|
|
EdgeAccessor edge =
|
2017-08-09 21:36:01 +08:00
|
|
|
db_.InsertEdge(from, to, self_.edge_atom_->edge_types_[0]);
|
2017-06-12 21:12:31 +08:00
|
|
|
for (auto kv : self_.edge_atom_->properties_)
|
2017-08-08 19:43:42 +08:00
|
|
|
PropsSetChecked(edge, kv.first.second, kv.second->Accept(evaluator));
|
2017-04-11 18:29:20 +08:00
|
|
|
frame[symbol_table.at(*self_.edge_atom_->identifier_)] = edge;
|
2017-03-30 17:15:57 +08:00
|
|
|
}
|
|
|
|
|
2017-05-29 21:08:27 +08:00
|
|
|
template <class TVerticesFun>
|
2017-05-19 21:37:28 +08:00
|
|
|
class ScanAllCursor : public Cursor {
|
|
|
|
public:
|
2017-11-09 20:46:37 +08:00
|
|
|
explicit ScanAllCursor(Symbol output_symbol,
|
|
|
|
std::unique_ptr<Cursor> &&input_cursor,
|
2018-01-12 22:17:04 +08:00
|
|
|
TVerticesFun &&get_vertices,
|
|
|
|
database::GraphDbAccessor &db)
|
2017-05-19 21:37:28 +08:00
|
|
|
: output_symbol_(output_symbol),
|
|
|
|
input_cursor_(std::move(input_cursor)),
|
2017-07-14 19:58:25 +08:00
|
|
|
get_vertices_(std::move(get_vertices)),
|
|
|
|
db_(db) {}
|
2017-05-19 21:37:28 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Pull(Frame &frame, Context &context) override {
|
2017-07-14 19:58:25 +08:00
|
|
|
if (db_.should_abort()) throw HintedAbortError();
|
2018-03-09 21:52:09 +08:00
|
|
|
|
|
|
|
while (!vertices_ || vertices_it_.value() == vertices_.value().end()) {
|
2017-09-13 16:27:12 +08:00
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
2017-05-29 21:08:27 +08:00
|
|
|
// We need a getter function, because in case of exhausting a lazy
|
|
|
|
// iterable, we cannot simply reset it by calling begin().
|
2018-03-12 22:06:00 +08:00
|
|
|
auto next_vertices = get_vertices_(frame, context);
|
|
|
|
if (!next_vertices) continue;
|
|
|
|
// Since vertices iterator isn't nothrow_move_assignable, we have to use
|
|
|
|
// the roundabout assignment + emplace, instead of simple:
|
|
|
|
// vertices _ = get_vertices_(frame, context);
|
|
|
|
vertices_.emplace(std::move(next_vertices.value()));
|
2017-06-27 17:50:57 +08:00
|
|
|
vertices_it_.emplace(vertices_.value().begin());
|
2017-05-19 21:37:28 +08:00
|
|
|
}
|
|
|
|
|
2017-06-27 17:50:57 +08:00
|
|
|
frame[output_symbol_] = *vertices_it_.value()++;
|
2017-05-19 21:37:28 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override {
|
|
|
|
input_cursor_->Reset();
|
2017-06-27 17:50:57 +08:00
|
|
|
vertices_ = std::experimental::nullopt;
|
|
|
|
vertices_it_ = std::experimental::nullopt;
|
2017-05-19 21:37:28 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
const Symbol output_symbol_;
|
|
|
|
const std::unique_ptr<Cursor> input_cursor_;
|
2017-05-29 21:08:27 +08:00
|
|
|
TVerticesFun get_vertices_;
|
2018-03-12 22:06:00 +08:00
|
|
|
std::experimental::optional<typename std::result_of<TVerticesFun(
|
|
|
|
Frame &, Context &)>::type::value_type>
|
2017-06-27 17:50:57 +08:00
|
|
|
vertices_;
|
|
|
|
std::experimental::optional<decltype(vertices_.value().begin())> vertices_it_;
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db_;
|
2017-05-19 21:37:28 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
ScanAll::ScanAll(const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
Symbol output_symbol, GraphView graph_view)
|
|
|
|
: input_(input ? input : std::make_shared<Once>()),
|
|
|
|
output_symbol_(output_symbol),
|
2018-05-15 19:10:15 +08:00
|
|
|
graph_view_(graph_view) {}
|
2017-05-19 21:37:28 +08:00
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(ScanAll)
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> ScanAll::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-09-13 16:27:12 +08:00
|
|
|
auto vertices = [this, &db](Frame &, Context &) {
|
2018-03-12 22:06:00 +08:00
|
|
|
return std::experimental::make_optional(
|
|
|
|
db.Vertices(graph_view_ == GraphView::NEW));
|
2017-05-29 21:08:27 +08:00
|
|
|
};
|
2017-05-19 21:37:28 +08:00
|
|
|
return std::make_unique<ScanAllCursor<decltype(vertices)>>(
|
2017-07-14 19:58:25 +08:00
|
|
|
output_symbol_, input_->MakeCursor(db), std::move(vertices), db);
|
2017-03-30 17:15:57 +08:00
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> ScanAll::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
auto symbols = input_->ModifiedSymbols(table);
|
|
|
|
symbols.emplace_back(output_symbol_);
|
|
|
|
return symbols;
|
|
|
|
}
|
|
|
|
|
2017-05-19 21:37:28 +08:00
|
|
|
ScanAllByLabel::ScanAllByLabel(const std::shared_ptr<LogicalOperator> &input,
|
2018-01-16 17:09:15 +08:00
|
|
|
Symbol output_symbol, storage::Label label,
|
2017-05-19 21:37:28 +08:00
|
|
|
GraphView graph_view)
|
|
|
|
: ScanAll(input, output_symbol, graph_view), label_(label) {}
|
2017-04-04 21:21:44 +08:00
|
|
|
|
2017-05-19 21:37:28 +08:00
|
|
|
ACCEPT_WITH_INPUT(ScanAllByLabel)
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> ScanAllByLabel::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-09-13 16:27:12 +08:00
|
|
|
auto vertices = [this, &db](Frame &, Context &) {
|
2018-03-12 22:06:00 +08:00
|
|
|
return std::experimental::make_optional(
|
|
|
|
db.Vertices(label_, graph_view_ == GraphView::NEW));
|
2017-05-29 21:08:27 +08:00
|
|
|
};
|
2017-05-19 21:37:28 +08:00
|
|
|
return std::make_unique<ScanAllCursor<decltype(vertices)>>(
|
2017-07-14 19:58:25 +08:00
|
|
|
output_symbol_, input_->MakeCursor(db), std::move(vertices), db);
|
2017-04-24 16:16:53 +08:00
|
|
|
}
|
|
|
|
|
2017-06-27 17:50:57 +08:00
|
|
|
ScanAllByLabelPropertyRange::ScanAllByLabelPropertyRange(
|
|
|
|
const std::shared_ptr<LogicalOperator> &input, Symbol output_symbol,
|
2018-01-16 17:09:15 +08:00
|
|
|
storage::Label label, storage::Property property,
|
2017-06-27 17:50:57 +08:00
|
|
|
std::experimental::optional<Bound> lower_bound,
|
|
|
|
std::experimental::optional<Bound> upper_bound, GraphView graph_view)
|
|
|
|
: ScanAll(input, output_symbol, graph_view),
|
|
|
|
label_(label),
|
|
|
|
property_(property),
|
|
|
|
lower_bound_(lower_bound),
|
|
|
|
upper_bound_(upper_bound) {
|
2017-10-11 19:19:10 +08:00
|
|
|
DCHECK(lower_bound_ || upper_bound_) << "Only one bound can be left out";
|
2017-06-27 17:50:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ACCEPT_WITH_INPUT(ScanAllByLabelPropertyRange)
|
|
|
|
|
|
|
|
std::unique_ptr<Cursor> ScanAllByLabelPropertyRange::MakeCursor(
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db) const {
|
2018-03-12 22:06:00 +08:00
|
|
|
auto vertices = [this, &db](Frame &frame, Context &context)
|
|
|
|
-> std::experimental::optional<decltype(
|
|
|
|
db.Vertices(label_, property_, std::experimental::nullopt,
|
|
|
|
std::experimental::nullopt, false))> {
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, graph_view_);
|
2018-06-14 22:02:27 +08:00
|
|
|
auto convert = [&evaluator](const auto &bound)
|
|
|
|
-> std::experimental::optional<utils::Bound<PropertyValue>> {
|
|
|
|
if (!bound) return std::experimental::nullopt;
|
|
|
|
auto value = bound->value()->Accept(evaluator);
|
|
|
|
try {
|
2018-03-12 22:06:00 +08:00
|
|
|
return std::experimental::make_optional(
|
2018-06-14 22:02:27 +08:00
|
|
|
utils::Bound<PropertyValue>(value, bound->type()));
|
|
|
|
} catch (const TypedValueException &) {
|
|
|
|
throw QueryRuntimeException("'{}' cannot be used as a property value.",
|
|
|
|
value.type());
|
|
|
|
}
|
|
|
|
};
|
|
|
|
auto maybe_lower = convert(lower_bound());
|
|
|
|
auto maybe_upper = convert(upper_bound());
|
|
|
|
// If any bound is null, then the comparison would result in nulls. This
|
|
|
|
// is treated as not satisfying the filter, so return no vertices.
|
|
|
|
if (maybe_lower && maybe_lower->value().IsNull())
|
|
|
|
return std::experimental::nullopt;
|
|
|
|
if (maybe_upper && maybe_upper->value().IsNull())
|
|
|
|
return std::experimental::nullopt;
|
|
|
|
return std::experimental::make_optional(
|
|
|
|
db.Vertices(label_, property_, maybe_lower, maybe_upper,
|
|
|
|
graph_view_ == GraphView::NEW));
|
|
|
|
};
|
2017-06-27 17:50:57 +08:00
|
|
|
return std::make_unique<ScanAllCursor<decltype(vertices)>>(
|
2017-07-14 19:58:25 +08:00
|
|
|
output_symbol_, input_->MakeCursor(db), std::move(vertices), db);
|
2017-06-27 17:50:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ScanAllByLabelPropertyValue::ScanAllByLabelPropertyValue(
|
|
|
|
const std::shared_ptr<LogicalOperator> &input, Symbol output_symbol,
|
2018-01-16 17:09:15 +08:00
|
|
|
storage::Label label, storage::Property property, Expression *expression,
|
2018-01-12 22:17:04 +08:00
|
|
|
GraphView graph_view)
|
2017-06-27 17:50:57 +08:00
|
|
|
: ScanAll(input, output_symbol, graph_view),
|
|
|
|
label_(label),
|
|
|
|
property_(property),
|
|
|
|
expression_(expression) {
|
2017-10-11 19:19:10 +08:00
|
|
|
DCHECK(expression) << "Expression is not optional.";
|
2017-06-27 17:50:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
ACCEPT_WITH_INPUT(ScanAllByLabelPropertyValue)
|
|
|
|
|
|
|
|
std::unique_ptr<Cursor> ScanAllByLabelPropertyValue::MakeCursor(
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db) const {
|
2018-03-12 22:06:00 +08:00
|
|
|
auto vertices = [this, &db](Frame &frame, Context &context)
|
|
|
|
-> std::experimental::optional<decltype(
|
|
|
|
db.Vertices(label_, property_, TypedValue::Null, false))> {
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, graph_view_);
|
2018-06-14 22:02:27 +08:00
|
|
|
auto value = expression_->Accept(evaluator);
|
|
|
|
if (value.IsNull()) return std::experimental::nullopt;
|
|
|
|
try {
|
|
|
|
return std::experimental::make_optional(
|
|
|
|
db.Vertices(label_, property_, value, graph_view_ == GraphView::NEW));
|
|
|
|
} catch (const TypedValueException &) {
|
|
|
|
throw QueryRuntimeException("'{}' cannot be used as a property value.",
|
|
|
|
value.type());
|
|
|
|
}
|
|
|
|
};
|
2018-03-12 22:06:00 +08:00
|
|
|
return std::make_unique<ScanAllCursor<decltype(vertices)>>(
|
|
|
|
output_symbol_, input_->MakeCursor(db), std::move(vertices), db);
|
2017-06-27 17:50:57 +08:00
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
ExpandCommon::ExpandCommon(Symbol node_symbol, Symbol edge_symbol,
|
|
|
|
EdgeAtom::Direction direction,
|
2018-01-16 17:09:15 +08:00
|
|
|
const std::vector<storage::EdgeType> &edge_types,
|
2018-01-12 22:17:04 +08:00
|
|
|
const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
Symbol input_symbol, bool existing_node,
|
|
|
|
GraphView graph_view)
|
2017-05-30 16:39:17 +08:00
|
|
|
: node_symbol_(node_symbol),
|
|
|
|
edge_symbol_(edge_symbol),
|
|
|
|
direction_(direction),
|
2017-09-26 18:51:52 +08:00
|
|
|
edge_types_(edge_types),
|
2017-04-26 19:49:41 +08:00
|
|
|
input_(input ? input : std::make_shared<Once>()),
|
2017-03-30 17:15:57 +08:00
|
|
|
input_symbol_(input_symbol),
|
2017-04-24 19:56:50 +08:00
|
|
|
existing_node_(existing_node),
|
2017-04-25 21:38:46 +08:00
|
|
|
graph_view_(graph_view) {}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-07-20 19:26:54 +08:00
|
|
|
bool ExpandCommon::HandleExistingNode(const VertexAccessor &new_node,
|
|
|
|
Frame &frame) const {
|
|
|
|
if (existing_node_) {
|
|
|
|
TypedValue &old_node_value = frame[node_symbol_];
|
|
|
|
// old_node_value may be Null when using optional matching
|
|
|
|
if (old_node_value.IsNull()) return false;
|
|
|
|
ExpectType(node_symbol_, old_node_value, TypedValue::Type::Vertex);
|
|
|
|
return old_node_value.Value<VertexAccessor>() == new_node;
|
|
|
|
} else {
|
|
|
|
frame[node_symbol_] = new_node;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(Expand)
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Expand::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-03-30 17:15:57 +08:00
|
|
|
return std::make_unique<ExpandCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> Expand::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
auto symbols = input_->ModifiedSymbols(table);
|
|
|
|
symbols.emplace_back(node_symbol());
|
|
|
|
symbols.emplace_back(edge_symbol());
|
|
|
|
return symbols;
|
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
Expand::ExpandCursor::ExpandCursor(const Expand &self,
|
|
|
|
database::GraphDbAccessor &db)
|
2017-07-14 19:58:25 +08:00
|
|
|
: self_(self), input_cursor_(self.input_->MakeCursor(db)), db_(db) {}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Expand::ExpandCursor::Pull(Frame &frame, Context &context) {
|
2017-09-08 17:29:54 +08:00
|
|
|
// A helper function for expanding a node from an edge.
|
|
|
|
auto pull_node = [this, &frame](const EdgeAccessor &new_edge,
|
|
|
|
EdgeAtom::Direction direction) {
|
|
|
|
if (self_.existing_node_) return;
|
|
|
|
switch (direction) {
|
|
|
|
case EdgeAtom::Direction::IN:
|
|
|
|
frame[self_.node_symbol_] = new_edge.from();
|
|
|
|
break;
|
|
|
|
case EdgeAtom::Direction::OUT:
|
|
|
|
frame[self_.node_symbol_] = new_edge.to();
|
|
|
|
break;
|
|
|
|
case EdgeAtom::Direction::BOTH:
|
2017-10-11 19:19:10 +08:00
|
|
|
LOG(FATAL) << "Must indicate exact expansion direction here";
|
2017-09-08 17:29:54 +08:00
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-02-23 00:07:35 +08:00
|
|
|
auto push_future_edge = [this, &frame](auto edge, auto direction) {
|
|
|
|
auto edge_to = std::async(std::launch::async, [edge, direction]() {
|
|
|
|
if (direction == EdgeAtom::Direction::IN)
|
|
|
|
return std::make_pair(edge, edge.from());
|
|
|
|
if (direction == EdgeAtom::Direction::OUT)
|
|
|
|
return std::make_pair(edge, edge.to());
|
|
|
|
LOG(FATAL) << "Must indicate exact expansion direction here";
|
|
|
|
});
|
|
|
|
future_expands_.emplace_back(
|
2018-04-22 14:31:09 +08:00
|
|
|
FutureExpand{utils::make_future(std::move(edge_to)), frame.elems()});
|
2018-02-23 00:07:35 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
auto find_ready_future = [this]() {
|
2018-03-26 18:17:17 +08:00
|
|
|
return std::find_if(
|
|
|
|
future_expands_.begin(), future_expands_.end(),
|
|
|
|
[](const auto &future) { return future.edge_to.IsReady(); });
|
2018-02-23 00:07:35 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
auto put_future_edge_on_frame = [this, &frame](auto &future) {
|
|
|
|
auto edge_to = future.edge_to.get();
|
|
|
|
frame.elems() = future.frame_elems;
|
|
|
|
frame[self_.edge_symbol_] = edge_to.first;
|
|
|
|
frame[self_.node_symbol_] = edge_to.second;
|
|
|
|
};
|
|
|
|
|
2017-03-30 17:15:57 +08:00
|
|
|
while (true) {
|
2017-07-14 19:58:25 +08:00
|
|
|
if (db_.should_abort()) throw HintedAbortError();
|
2018-02-23 00:07:35 +08:00
|
|
|
// Try to get any remote edges we may have available first. If we yielded
|
|
|
|
// all of the local edges first, we may accumulate large amounts of future
|
|
|
|
// edges.
|
|
|
|
{
|
|
|
|
auto future_it = find_ready_future();
|
|
|
|
if (future_it != future_expands_.end()) {
|
|
|
|
// Backup the current frame (if we haven't done so already) before
|
|
|
|
// putting the future edge.
|
|
|
|
if (last_frame_.empty()) last_frame_ = frame.elems();
|
|
|
|
put_future_edge_on_frame(*future_it);
|
|
|
|
// Erase the future and return true to yield the result.
|
|
|
|
future_expands_.erase(future_it);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
// In case we have replaced the frame with the one for a future edge,
|
|
|
|
// restore it.
|
|
|
|
if (!last_frame_.empty()) {
|
|
|
|
frame.elems() = last_frame_;
|
|
|
|
last_frame_.clear();
|
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
// attempt to get a value from the incoming edges
|
|
|
|
if (in_edges_ && *in_edges_it_ != in_edges_->end()) {
|
2018-02-23 00:07:35 +08:00
|
|
|
auto edge = *(*in_edges_it_)++;
|
|
|
|
if (edge.address().is_local() || self_.existing_node_) {
|
|
|
|
frame[self_.edge_symbol_] = edge;
|
|
|
|
pull_node(edge, EdgeAtom::Direction::IN);
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
push_future_edge(edge, EdgeAtom::Direction::IN);
|
|
|
|
continue;
|
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// attempt to get a value from the outgoing edges
|
|
|
|
if (out_edges_ && *out_edges_it_ != out_edges_->end()) {
|
2018-02-23 00:07:35 +08:00
|
|
|
auto edge = *(*out_edges_it_)++;
|
2017-04-11 21:44:44 +08:00
|
|
|
// when expanding in EdgeAtom::Direction::BOTH directions
|
|
|
|
// we should do only one expansion for cycles, and it was
|
|
|
|
// already done in the block above
|
2017-05-30 16:39:17 +08:00
|
|
|
if (self_.direction_ == EdgeAtom::Direction::BOTH && edge.is_cycle())
|
2017-04-11 21:44:44 +08:00
|
|
|
continue;
|
2018-02-23 00:07:35 +08:00
|
|
|
if (edge.address().is_local() || self_.existing_node_) {
|
|
|
|
frame[self_.edge_symbol_] = edge;
|
|
|
|
pull_node(edge, EdgeAtom::Direction::OUT);
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
push_future_edge(edge, EdgeAtom::Direction::OUT);
|
|
|
|
continue;
|
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// if we are here, either the edges have not been initialized,
|
|
|
|
// or they have been exhausted. attempt to initialize the edges,
|
|
|
|
// if the input is exhausted
|
2018-02-23 00:07:35 +08:00
|
|
|
if (!InitEdges(frame, context)) {
|
|
|
|
// We are done with local and remote edges so return false.
|
|
|
|
if (future_expands_.empty()) return false;
|
|
|
|
// We still need to yield remote edges.
|
|
|
|
auto future_it = find_ready_future();
|
|
|
|
if (future_it != future_expands_.end()) {
|
|
|
|
put_future_edge_on_frame(*future_it);
|
|
|
|
// Erase the future and return true to yield the result.
|
|
|
|
future_expands_.erase(future_it);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
// We are still waiting for future edges, so sleep and fallthrough to
|
|
|
|
// continue the loop.
|
|
|
|
std::this_thread::sleep_for(
|
2018-02-28 20:59:12 +08:00
|
|
|
std::chrono::microseconds(FLAGS_remote_pull_sleep_micros));
|
2018-02-23 00:07:35 +08:00
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
|
|
|
// we have re-initialized the edges, continue with the loop
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void Expand::ExpandCursor::Reset() {
|
|
|
|
input_cursor_->Reset();
|
2017-08-17 22:02:33 +08:00
|
|
|
in_edges_ = std::experimental::nullopt;
|
|
|
|
in_edges_it_ = std::experimental::nullopt;
|
|
|
|
out_edges_ = std::experimental::nullopt;
|
|
|
|
out_edges_it_ = std::experimental::nullopt;
|
2018-02-23 00:07:35 +08:00
|
|
|
future_expands_.clear();
|
|
|
|
last_frame_.clear();
|
2017-04-24 16:16:53 +08:00
|
|
|
}
|
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Expand::ExpandCursor::InitEdges(Frame &frame, Context &context) {
|
2017-08-04 15:44:51 +08:00
|
|
|
// Input Vertex could be null if it is created by a failed optional match. In
|
|
|
|
// those cases we skip that input pull and continue with the next.
|
|
|
|
while (true) {
|
2017-09-13 16:27:12 +08:00
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
2017-08-04 15:44:51 +08:00
|
|
|
TypedValue &vertex_value = frame[self_.input_symbol_];
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-08-04 15:44:51 +08:00
|
|
|
// Null check due to possible failed optional match.
|
|
|
|
if (vertex_value.IsNull()) continue;
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-08-04 15:44:51 +08:00
|
|
|
ExpectType(self_.input_symbol_, vertex_value, TypedValue::Type::Vertex);
|
|
|
|
auto &vertex = vertex_value.Value<VertexAccessor>();
|
|
|
|
SwitchAccessor(vertex, self_.graph_view_);
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-08-04 15:44:51 +08:00
|
|
|
auto direction = self_.direction_;
|
|
|
|
if (direction == EdgeAtom::Direction::IN ||
|
|
|
|
direction == EdgeAtom::Direction::BOTH) {
|
2017-09-08 17:29:54 +08:00
|
|
|
if (self_.existing_node_) {
|
|
|
|
TypedValue &existing_node = frame[self_.node_symbol_];
|
|
|
|
// old_node_value may be Null when using optional matching
|
|
|
|
if (!existing_node.IsNull()) {
|
|
|
|
ExpectType(self_.node_symbol_, existing_node,
|
|
|
|
TypedValue::Type::Vertex);
|
|
|
|
in_edges_.emplace(
|
2017-09-27 20:57:41 +08:00
|
|
|
vertex.in(existing_node.ValueVertex(), &self_.edge_types()));
|
2017-09-08 17:29:54 +08:00
|
|
|
}
|
|
|
|
} else {
|
2017-09-27 20:57:41 +08:00
|
|
|
in_edges_.emplace(vertex.in(&self_.edge_types()));
|
2017-09-08 17:29:54 +08:00
|
|
|
}
|
2017-08-17 22:02:33 +08:00
|
|
|
in_edges_it_.emplace(in_edges_->begin());
|
2017-08-04 15:44:51 +08:00
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-08-04 15:44:51 +08:00
|
|
|
if (direction == EdgeAtom::Direction::OUT ||
|
|
|
|
direction == EdgeAtom::Direction::BOTH) {
|
2017-09-08 17:29:54 +08:00
|
|
|
if (self_.existing_node_) {
|
|
|
|
TypedValue &existing_node = frame[self_.node_symbol_];
|
|
|
|
// old_node_value may be Null when using optional matching
|
|
|
|
if (!existing_node.IsNull()) {
|
|
|
|
ExpectType(self_.node_symbol_, existing_node,
|
|
|
|
TypedValue::Type::Vertex);
|
|
|
|
out_edges_.emplace(
|
2017-09-27 20:57:41 +08:00
|
|
|
vertex.out(existing_node.ValueVertex(), &self_.edge_types()));
|
2017-09-08 17:29:54 +08:00
|
|
|
}
|
|
|
|
} else {
|
2017-09-27 20:57:41 +08:00
|
|
|
out_edges_.emplace(vertex.out(&self_.edge_types()));
|
2017-09-08 17:29:54 +08:00
|
|
|
}
|
2017-08-17 22:02:33 +08:00
|
|
|
out_edges_it_.emplace(out_edges_->begin());
|
2017-08-04 15:44:51 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
}
|
|
|
|
|
2017-09-26 18:51:52 +08:00
|
|
|
ExpandVariable::ExpandVariable(
|
2017-10-05 17:25:52 +08:00
|
|
|
Symbol node_symbol, Symbol edge_symbol, EdgeAtom::Type type,
|
|
|
|
EdgeAtom::Direction direction,
|
2018-01-16 17:09:15 +08:00
|
|
|
const std::vector<storage::EdgeType> &edge_types, bool is_reverse,
|
2017-09-26 18:51:52 +08:00
|
|
|
Expression *lower_bound, Expression *upper_bound,
|
|
|
|
const std::shared_ptr<LogicalOperator> &input, Symbol input_symbol,
|
2018-02-08 18:45:30 +08:00
|
|
|
bool existing_node, Lambda filter_lambda,
|
|
|
|
std::experimental::optional<Lambda> weight_lambda,
|
|
|
|
std::experimental::optional<Symbol> total_weight, GraphView graph_view)
|
2017-09-26 18:51:52 +08:00
|
|
|
: ExpandCommon(node_symbol, edge_symbol, direction, edge_types, input,
|
2017-10-05 17:25:52 +08:00
|
|
|
input_symbol, existing_node, graph_view),
|
|
|
|
type_(type),
|
|
|
|
is_reverse_(is_reverse),
|
2017-07-20 19:26:54 +08:00
|
|
|
lower_bound_(lower_bound),
|
2017-08-21 15:41:26 +08:00
|
|
|
upper_bound_(upper_bound),
|
2018-02-08 18:45:30 +08:00
|
|
|
filter_lambda_(filter_lambda),
|
|
|
|
weight_lambda_(weight_lambda),
|
|
|
|
total_weight_(total_weight) {
|
2017-10-11 19:19:10 +08:00
|
|
|
DCHECK(type_ == EdgeAtom::Type::DEPTH_FIRST ||
|
2018-02-08 18:45:30 +08:00
|
|
|
type_ == EdgeAtom::Type::BREADTH_FIRST ||
|
|
|
|
type_ == EdgeAtom::Type::WEIGHTED_SHORTEST_PATH)
|
|
|
|
<< "ExpandVariable can only be used with breadth first, depth first or "
|
|
|
|
"weighted shortest path type";
|
2017-10-11 19:19:10 +08:00
|
|
|
DCHECK(!(type_ == EdgeAtom::Type::BREADTH_FIRST && is_reverse))
|
|
|
|
<< "Breadth first expansion can't be reversed";
|
2017-10-05 17:25:52 +08:00
|
|
|
}
|
2017-07-20 19:26:54 +08:00
|
|
|
|
|
|
|
ACCEPT_WITH_INPUT(ExpandVariable)
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> ExpandVariable::ModifiedSymbols(
|
|
|
|
const SymbolTable &table) const {
|
|
|
|
auto symbols = input_->ModifiedSymbols(table);
|
|
|
|
symbols.emplace_back(node_symbol());
|
|
|
|
symbols.emplace_back(edge_symbol());
|
|
|
|
return symbols;
|
|
|
|
}
|
|
|
|
|
2017-07-20 19:26:54 +08:00
|
|
|
namespace {
|
|
|
|
/**
|
|
|
|
* Helper function that returns an iterable over
|
|
|
|
* <EdgeAtom::Direction, EdgeAccessor> pairs
|
|
|
|
* for the given params.
|
|
|
|
*
|
|
|
|
* @param vertex - The vertex to expand from.
|
|
|
|
* @param direction - Expansion direction. All directions (IN, OUT, BOTH)
|
|
|
|
* are supported.
|
|
|
|
* @return See above.
|
|
|
|
*/
|
|
|
|
auto ExpandFromVertex(const VertexAccessor &vertex,
|
2017-09-11 14:51:33 +08:00
|
|
|
EdgeAtom::Direction direction,
|
2018-01-16 17:09:15 +08:00
|
|
|
const std::vector<storage::EdgeType> &edge_types) {
|
2017-07-20 19:26:54 +08:00
|
|
|
// wraps an EdgeAccessor into a pair <accessor, direction>
|
|
|
|
auto wrapper = [](EdgeAtom::Direction direction, auto &&vertices) {
|
|
|
|
return iter::imap(
|
|
|
|
[direction](const EdgeAccessor &edge) {
|
|
|
|
return std::make_pair(edge, direction);
|
|
|
|
},
|
|
|
|
std::move(vertices));
|
|
|
|
};
|
|
|
|
|
|
|
|
// prepare a vector of elements we'll pass to the itertools
|
|
|
|
std::vector<decltype(wrapper(direction, vertex.in()))> chain_elements;
|
|
|
|
|
2017-09-11 14:51:33 +08:00
|
|
|
if (direction != EdgeAtom::Direction::OUT && vertex.in_degree() > 0) {
|
2017-09-27 20:57:41 +08:00
|
|
|
auto edges = vertex.in(&edge_types);
|
2017-09-26 18:51:52 +08:00
|
|
|
if (edges.begin() != edges.end()) {
|
2017-09-11 14:51:33 +08:00
|
|
|
chain_elements.emplace_back(
|
2017-09-26 18:51:52 +08:00
|
|
|
wrapper(EdgeAtom::Direction::IN, std::move(edges)));
|
2017-09-11 14:51:33 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (direction != EdgeAtom::Direction::IN && vertex.out_degree() > 0) {
|
2017-09-27 20:57:41 +08:00
|
|
|
auto edges = vertex.out(&edge_types);
|
2017-09-26 18:51:52 +08:00
|
|
|
if (edges.begin() != edges.end()) {
|
2017-09-11 14:51:33 +08:00
|
|
|
chain_elements.emplace_back(
|
2017-09-26 18:51:52 +08:00
|
|
|
wrapper(EdgeAtom::Direction::OUT, std::move(edges)));
|
2017-09-11 14:51:33 +08:00
|
|
|
}
|
|
|
|
}
|
2017-07-20 19:26:54 +08:00
|
|
|
|
|
|
|
return iter::chain.from_iterable(std::move(chain_elements));
|
2017-03-30 17:15:57 +08:00
|
|
|
}
|
2017-07-30 07:15:43 +08:00
|
|
|
|
|
|
|
/** A helper function for evaluating an expression that's an int.
|
|
|
|
*
|
|
|
|
* @param evaluator
|
|
|
|
* @param expr
|
|
|
|
* @param what - Name of what's getting evaluated. Used for user
|
|
|
|
* feedback (via exception) when the evaluated value is not an int.
|
|
|
|
*/
|
|
|
|
int64_t EvaluateInt(ExpressionEvaluator &evaluator, Expression *expr,
|
|
|
|
const std::string &what) {
|
|
|
|
TypedValue value = expr->Accept(evaluator);
|
|
|
|
try {
|
|
|
|
return value.Value<int64_t>();
|
|
|
|
} catch (TypedValueException &e) {
|
|
|
|
throw QueryRuntimeException(what + " must be an int");
|
|
|
|
}
|
|
|
|
}
|
2017-09-20 21:28:00 +08:00
|
|
|
} // namespace
|
2017-07-20 19:26:54 +08:00
|
|
|
|
|
|
|
class ExpandVariableCursor : public Cursor {
|
|
|
|
public:
|
2018-01-12 22:17:04 +08:00
|
|
|
ExpandVariableCursor(const ExpandVariable &self,
|
|
|
|
database::GraphDbAccessor &db)
|
2018-06-28 22:04:29 +08:00
|
|
|
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
|
2017-07-20 19:26:54 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Pull(Frame &frame, Context &context) override {
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, self_.graph_view_);
|
2017-07-20 19:26:54 +08:00
|
|
|
while (true) {
|
2017-09-13 16:27:12 +08:00
|
|
|
if (Expand(frame, context)) return true;
|
2017-07-20 19:26:54 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
if (PullInput(frame, context)) {
|
2017-07-20 19:26:54 +08:00
|
|
|
// if lower bound is zero we also yield empty paths
|
2017-07-28 20:30:30 +08:00
|
|
|
if (lower_bound_ == 0) {
|
2017-07-26 16:07:59 +08:00
|
|
|
auto &start_vertex =
|
|
|
|
frame[self_.input_symbol_].Value<VertexAccessor>();
|
2017-10-05 17:25:52 +08:00
|
|
|
if (self_.HandleExistingNode(start_vertex, frame)) {
|
2017-07-26 16:07:59 +08:00
|
|
|
return true;
|
2017-08-30 21:37:00 +08:00
|
|
|
}
|
2017-07-20 19:26:54 +08:00
|
|
|
}
|
|
|
|
// if lower bound is not zero, we just continue, the next
|
|
|
|
// loop iteration will attempt to expand and we're good
|
|
|
|
} else
|
|
|
|
return false;
|
|
|
|
// else continue with the loop, try to expand again
|
|
|
|
// because we succesfully pulled from the input
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override {
|
|
|
|
input_cursor_->Reset();
|
|
|
|
edges_.clear();
|
|
|
|
edges_it_.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
const ExpandVariable &self_;
|
|
|
|
const std::unique_ptr<Cursor> input_cursor_;
|
2017-07-28 20:30:30 +08:00
|
|
|
// bounds. in the cursor they are not optional but set to
|
|
|
|
// default values if missing in the ExpandVariable operator
|
|
|
|
// initialize to arbitrary values, they should only be used
|
|
|
|
// after a successful pull from the input
|
|
|
|
int64_t upper_bound_{-1};
|
|
|
|
int64_t lower_bound_{-1};
|
2017-07-20 19:26:54 +08:00
|
|
|
|
|
|
|
// a stack of edge iterables corresponding to the level/depth of
|
|
|
|
// the expansion currently being Pulled
|
|
|
|
std::vector<decltype(ExpandFromVertex(std::declval<VertexAccessor>(),
|
2017-09-11 14:51:33 +08:00
|
|
|
EdgeAtom::Direction::IN,
|
2017-09-26 18:51:52 +08:00
|
|
|
self_.edge_types_))>
|
2017-07-20 19:26:54 +08:00
|
|
|
edges_;
|
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// an iterator indicating the possition in the corresponding edges_
|
|
|
|
// element
|
2017-07-20 19:26:54 +08:00
|
|
|
std::vector<decltype(edges_.begin()->begin())> edges_it_;
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Helper function that Pulls from the input vertex and
|
|
|
|
* makes iteration over it's edges possible.
|
|
|
|
*
|
|
|
|
* @return If the Pull succeeded. If not, this VariableExpandCursor
|
|
|
|
* is exhausted.
|
|
|
|
*/
|
2017-09-13 16:27:12 +08:00
|
|
|
bool PullInput(Frame &frame, Context &context) {
|
2017-10-05 17:25:52 +08:00
|
|
|
// Input Vertex could be null if it is created by a failed optional
|
|
|
|
// match.
|
2017-08-04 15:44:51 +08:00
|
|
|
// In those cases we skip that input pull and continue with the next.
|
|
|
|
while (true) {
|
2017-09-13 16:27:12 +08:00
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
2017-08-04 15:44:51 +08:00
|
|
|
TypedValue &vertex_value = frame[self_.input_symbol_];
|
2017-07-20 19:26:54 +08:00
|
|
|
|
2017-08-04 15:44:51 +08:00
|
|
|
// Null check due to possible failed optional match.
|
|
|
|
if (vertex_value.IsNull()) continue;
|
2017-07-20 19:26:54 +08:00
|
|
|
|
2017-08-04 15:44:51 +08:00
|
|
|
ExpectType(self_.input_symbol_, vertex_value, TypedValue::Type::Vertex);
|
|
|
|
auto &vertex = vertex_value.Value<VertexAccessor>();
|
2017-08-03 18:56:13 +08:00
|
|
|
SwitchAccessor(vertex, self_.graph_view_);
|
2017-07-20 19:26:54 +08:00
|
|
|
|
2017-08-04 15:44:51 +08:00
|
|
|
// Evaluate the upper and lower bounds.
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, self_.graph_view_);
|
2017-09-20 21:28:00 +08:00
|
|
|
auto calc_bound = [&evaluator](auto &bound) {
|
2017-08-04 15:44:51 +08:00
|
|
|
auto value = EvaluateInt(evaluator, bound, "Variable expansion bound");
|
|
|
|
if (value < 0)
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Variable expansion bound must be positive or zero");
|
|
|
|
return value;
|
|
|
|
};
|
|
|
|
|
|
|
|
lower_bound_ = self_.lower_bound_ ? calc_bound(self_.lower_bound_) : 1;
|
|
|
|
upper_bound_ = self_.upper_bound_ ? calc_bound(self_.upper_bound_)
|
|
|
|
: std::numeric_limits<int64_t>::max();
|
|
|
|
|
|
|
|
if (upper_bound_ > 0) {
|
|
|
|
SwitchAccessor(vertex, self_.graph_view_);
|
2017-09-11 14:51:33 +08:00
|
|
|
edges_.emplace_back(
|
2017-09-26 18:51:52 +08:00
|
|
|
ExpandFromVertex(vertex, self_.direction_, self_.edge_types_));
|
2017-08-04 15:44:51 +08:00
|
|
|
edges_it_.emplace_back(edges_.back().begin());
|
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-08-04 15:44:51 +08:00
|
|
|
// reset the frame value to an empty edge list
|
2017-10-05 17:25:52 +08:00
|
|
|
frame[self_.edge_symbol_] = std::vector<TypedValue>();
|
2017-08-04 15:44:51 +08:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
}
|
2017-07-20 19:26:54 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// Helper function for appending an edge to the list on the frame.
|
|
|
|
void AppendEdge(const EdgeAccessor &new_edge,
|
|
|
|
std::vector<TypedValue> &edges_on_frame) {
|
|
|
|
// We are placing an edge on the frame. It is possible that there already
|
|
|
|
// exists an edge on the frame for this level. If so first remove it.
|
2017-10-11 19:19:10 +08:00
|
|
|
DCHECK(edges_.size() > 0) << "Edges are empty";
|
2017-10-05 17:25:52 +08:00
|
|
|
if (self_.is_reverse_) {
|
|
|
|
// TODO: This is innefficient, we should look into replacing
|
|
|
|
// vector with something else for TypedValue::List.
|
|
|
|
size_t diff = edges_on_frame.size() -
|
|
|
|
std::min(edges_on_frame.size(), edges_.size() - 1U);
|
|
|
|
if (diff > 0U)
|
|
|
|
edges_on_frame.erase(edges_on_frame.begin(),
|
|
|
|
edges_on_frame.begin() + diff);
|
|
|
|
edges_on_frame.insert(edges_on_frame.begin(), new_edge);
|
2017-07-20 19:26:54 +08:00
|
|
|
} else {
|
2017-10-05 17:25:52 +08:00
|
|
|
edges_on_frame.resize(
|
|
|
|
std::min(edges_on_frame.size(), edges_.size() - 1U));
|
|
|
|
edges_on_frame.emplace_back(new_edge);
|
2017-07-20 19:26:54 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/**
|
|
|
|
* Performs a single expansion for the current state of this
|
|
|
|
* VariableExpansionCursor.
|
|
|
|
*
|
|
|
|
* @return True if the expansion was a success and this Cursor's
|
|
|
|
* consumer can consume it. False if the expansion failed. In that
|
|
|
|
* case no more expansions are available from the current input
|
|
|
|
* vertex and another Pull from the input cursor should be performed.
|
|
|
|
*/
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Expand(Frame &frame, Context &context) {
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, self_.graph_view_);
|
2017-10-05 17:25:52 +08:00
|
|
|
// Some expansions might not be valid due to edge uniqueness and
|
|
|
|
// existing_node criterions, so expand in a loop until either the input
|
|
|
|
// vertex is exhausted or a valid variable-length expansion is available.
|
2017-07-20 19:26:54 +08:00
|
|
|
while (true) {
|
|
|
|
// pop from the stack while there is stuff to pop and the current
|
|
|
|
// level is exhausted
|
|
|
|
while (!edges_.empty() && edges_it_.back() == edges_.back().end()) {
|
|
|
|
edges_.pop_back();
|
|
|
|
edges_it_.pop_back();
|
|
|
|
}
|
|
|
|
|
|
|
|
// check if we exhausted everything, if so return false
|
|
|
|
if (edges_.empty()) return false;
|
|
|
|
|
|
|
|
// we use this a lot
|
|
|
|
std::vector<TypedValue> &edges_on_frame =
|
|
|
|
frame[self_.edge_symbol_].Value<std::vector<TypedValue>>();
|
|
|
|
|
|
|
|
// it is possible that edges_on_frame does not contain as many
|
|
|
|
// elements as edges_ due to edge-uniqueness (when a whole layer
|
|
|
|
// gets exhausted but no edges are valid). for that reason only
|
|
|
|
// pop from edges_on_frame if they contain enough elements
|
2017-10-05 17:25:52 +08:00
|
|
|
if (self_.is_reverse_) {
|
|
|
|
auto diff = edges_on_frame.size() -
|
|
|
|
std::min(edges_on_frame.size(), edges_.size());
|
|
|
|
if (diff > 0) {
|
|
|
|
edges_on_frame.erase(edges_on_frame.begin(),
|
|
|
|
edges_on_frame.begin() + diff);
|
2017-08-21 15:41:26 +08:00
|
|
|
}
|
2017-10-05 17:25:52 +08:00
|
|
|
} else {
|
|
|
|
edges_on_frame.resize(std::min(edges_on_frame.size(), edges_.size()));
|
2017-08-21 15:41:26 +08:00
|
|
|
}
|
2017-07-20 19:26:54 +08:00
|
|
|
|
|
|
|
// if we are here, we have a valid stack,
|
|
|
|
// get the edge, increase the relevant iterator
|
|
|
|
std::pair<EdgeAccessor, EdgeAtom::Direction> current_edge =
|
|
|
|
*edges_it_.back()++;
|
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// Check edge-uniqueness.
|
|
|
|
bool found_existing =
|
|
|
|
std::any_of(edges_on_frame.begin(), edges_on_frame.end(),
|
|
|
|
[¤t_edge](const TypedValue &edge) {
|
|
|
|
return current_edge.first == edge.Value<EdgeAccessor>();
|
|
|
|
});
|
|
|
|
if (found_existing) continue;
|
2017-07-20 19:26:54 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
AppendEdge(current_edge.first, edges_on_frame);
|
2017-07-20 19:26:54 +08:00
|
|
|
VertexAccessor current_vertex =
|
|
|
|
current_edge.second == EdgeAtom::Direction::IN
|
|
|
|
? current_edge.first.from()
|
|
|
|
: current_edge.first.to();
|
|
|
|
|
|
|
|
if (!self_.HandleExistingNode(current_vertex, frame)) continue;
|
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// Skip expanding out of filtered expansion.
|
2018-02-08 18:45:30 +08:00
|
|
|
frame[self_.filter_lambda_.inner_edge_symbol] = current_edge.first;
|
|
|
|
frame[self_.filter_lambda_.inner_node_symbol] = current_vertex;
|
|
|
|
if (self_.filter_lambda_.expression &&
|
|
|
|
!EvaluateFilter(evaluator, self_.filter_lambda_.expression))
|
|
|
|
continue;
|
2017-10-05 17:25:52 +08:00
|
|
|
|
2017-07-20 19:26:54 +08:00
|
|
|
// we are doing depth-first search, so place the current
|
|
|
|
// edge's expansions onto the stack, if we should continue to expand
|
2017-07-28 20:30:30 +08:00
|
|
|
if (upper_bound_ > static_cast<int64_t>(edges_.size())) {
|
2017-08-03 18:56:13 +08:00
|
|
|
SwitchAccessor(current_vertex, self_.graph_view_);
|
2017-09-11 14:51:33 +08:00
|
|
|
edges_.emplace_back(ExpandFromVertex(current_vertex, self_.direction_,
|
2017-09-26 18:51:52 +08:00
|
|
|
self_.edge_types_));
|
2017-07-20 19:26:54 +08:00
|
|
|
edges_it_.emplace_back(edges_.back().begin());
|
|
|
|
}
|
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// We only yield true if we satisfy the lower bound.
|
|
|
|
if (static_cast<int64_t>(edges_on_frame.size()) >= lower_bound_)
|
2017-07-20 19:26:54 +08:00
|
|
|
return true;
|
|
|
|
else
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-05-15 23:38:47 +08:00
|
|
|
class ExpandBfsCursor : public query::plan::Cursor {
|
2017-10-05 17:25:52 +08:00
|
|
|
public:
|
2018-05-15 23:38:47 +08:00
|
|
|
ExpandBfsCursor(const ExpandVariable &self, database::GraphDbAccessor &db)
|
2018-06-28 22:04:29 +08:00
|
|
|
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
|
2017-07-30 07:15:43 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
bool Pull(Frame &frame, Context &context) override {
|
2018-05-15 19:10:15 +08:00
|
|
|
// evaluator for the filtering condition
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, self_.graph_view_);
|
2017-07-30 07:15:43 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// for the given (edge, vertex) pair checks if they satisfy the
|
|
|
|
// "where" condition. if so, places them in the to_visit_ structure.
|
|
|
|
auto expand_pair = [this, &evaluator, &frame](EdgeAccessor edge,
|
|
|
|
VertexAccessor vertex) {
|
|
|
|
// if we already processed the given vertex it doesn't get expanded
|
|
|
|
if (processed_.find(vertex) != processed_.end()) return;
|
2017-07-30 07:15:43 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
SwitchAccessor(edge, self_.graph_view_);
|
|
|
|
SwitchAccessor(vertex, self_.graph_view_);
|
2017-07-30 07:15:43 +08:00
|
|
|
|
2018-02-08 18:45:30 +08:00
|
|
|
frame[self_.filter_lambda_.inner_edge_symbol] = edge;
|
|
|
|
frame[self_.filter_lambda_.inner_node_symbol] = vertex;
|
2017-10-05 17:25:52 +08:00
|
|
|
|
2018-02-08 18:45:30 +08:00
|
|
|
if (self_.filter_lambda_.expression) {
|
|
|
|
TypedValue result = self_.filter_lambda_.expression->Accept(evaluator);
|
2017-10-05 17:25:52 +08:00
|
|
|
switch (result.type()) {
|
|
|
|
case TypedValue::Type::Null:
|
|
|
|
return;
|
|
|
|
case TypedValue::Type::Bool:
|
|
|
|
if (!result.Value<bool>()) return;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Expansion condition must be boolean or null");
|
|
|
|
}
|
2017-09-27 20:57:41 +08:00
|
|
|
}
|
2017-10-05 17:25:52 +08:00
|
|
|
to_visit_next_.emplace_back(edge, vertex);
|
|
|
|
processed_.emplace(vertex, edge);
|
|
|
|
};
|
|
|
|
|
|
|
|
// populates the to_visit_next_ structure with expansions
|
|
|
|
// from the given vertex. skips expansions that don't satisfy
|
|
|
|
// the "where" condition.
|
|
|
|
auto expand_from_vertex = [this, &expand_pair](VertexAccessor &vertex) {
|
|
|
|
if (self_.direction_ != EdgeAtom::Direction::IN) {
|
|
|
|
for (const EdgeAccessor &edge : vertex.out(&self_.edge_types_))
|
|
|
|
expand_pair(edge, edge.to());
|
|
|
|
}
|
|
|
|
if (self_.direction_ != EdgeAtom::Direction::OUT) {
|
|
|
|
for (const EdgeAccessor &edge : vertex.in(&self_.edge_types_))
|
|
|
|
expand_pair(edge, edge.from());
|
|
|
|
}
|
|
|
|
};
|
2017-07-30 07:15:43 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// do it all in a loop because we skip some elements
|
|
|
|
while (true) {
|
|
|
|
// if we have nothing to visit on the current depth, switch to next
|
|
|
|
if (to_visit_current_.empty()) to_visit_current_.swap(to_visit_next_);
|
|
|
|
|
|
|
|
// if current is still empty, it means both are empty, so pull from
|
|
|
|
// input
|
2018-07-06 21:26:59 +08:00
|
|
|
if (skip_rest_ || to_visit_current_.empty()) {
|
2017-10-05 17:25:52 +08:00
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
2018-07-06 21:26:59 +08:00
|
|
|
to_visit_current_.clear();
|
|
|
|
to_visit_next_.clear();
|
2017-10-05 17:25:52 +08:00
|
|
|
processed_.clear();
|
|
|
|
|
|
|
|
auto vertex_value = frame[self_.input_symbol_];
|
|
|
|
// it is possible that the vertex is Null due to optional matching
|
|
|
|
if (vertex_value.IsNull()) continue;
|
|
|
|
auto vertex = vertex_value.Value<VertexAccessor>();
|
|
|
|
SwitchAccessor(vertex, self_.graph_view_);
|
|
|
|
processed_.emplace(vertex, std::experimental::nullopt);
|
|
|
|
expand_from_vertex(vertex);
|
2017-10-06 15:33:12 +08:00
|
|
|
lower_bound_ = self_.lower_bound_
|
|
|
|
? EvaluateInt(evaluator, self_.lower_bound_,
|
|
|
|
"Min depth in breadth-first expansion")
|
|
|
|
: 1;
|
2017-10-05 17:25:52 +08:00
|
|
|
upper_bound_ = self_.upper_bound_
|
|
|
|
? EvaluateInt(evaluator, self_.upper_bound_,
|
|
|
|
"Max depth in breadth-first expansion")
|
|
|
|
: std::numeric_limits<int>::max();
|
2018-07-06 21:26:59 +08:00
|
|
|
skip_rest_ = false;
|
2017-10-05 17:25:52 +08:00
|
|
|
if (upper_bound_ < 1)
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Max depth in breadth-first expansion must be greater then "
|
|
|
|
"zero");
|
2017-07-30 07:15:43 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// go back to loop start and see if we expanded anything
|
|
|
|
continue;
|
|
|
|
}
|
2017-07-30 07:15:43 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// take the next expansion from the queue
|
|
|
|
std::pair<EdgeAccessor, VertexAccessor> expansion =
|
|
|
|
to_visit_current_.front();
|
|
|
|
to_visit_current_.pop_front();
|
|
|
|
|
|
|
|
// create the frame value for the edges
|
|
|
|
std::vector<TypedValue> edge_list{expansion.first};
|
|
|
|
auto last_vertex = expansion.second;
|
|
|
|
while (true) {
|
|
|
|
const EdgeAccessor &last_edge = edge_list.back().Value<EdgeAccessor>();
|
|
|
|
last_vertex =
|
|
|
|
last_edge.from() == last_vertex ? last_edge.to() : last_edge.from();
|
|
|
|
// origin_vertex must be in processed
|
|
|
|
const auto &previous_edge = processed_.find(last_vertex)->second;
|
|
|
|
if (!previous_edge) break;
|
|
|
|
|
|
|
|
edge_list.push_back(previous_edge.value());
|
|
|
|
}
|
2017-07-30 07:15:43 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// expand only if what we've just expanded is less then max depth
|
|
|
|
if (static_cast<int>(edge_list.size()) < upper_bound_)
|
|
|
|
expand_from_vertex(expansion.second);
|
2017-07-30 07:15:43 +08:00
|
|
|
|
2017-10-10 00:09:28 +08:00
|
|
|
if (static_cast<int64_t>(edge_list.size()) < lower_bound_) continue;
|
2017-10-06 15:33:12 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// place destination node on the frame, handle existence flag
|
|
|
|
if (self_.existing_node_) {
|
|
|
|
TypedValue &node = frame[self_.node_symbol_];
|
|
|
|
// due to optional matching the existing node could be null
|
|
|
|
if (node.IsNull() || (node != expansion.second).Value<bool>()) continue;
|
2018-07-06 21:26:59 +08:00
|
|
|
// there is no point in traversing the rest of the graph because bfs
|
|
|
|
// can find only one path to a certain node
|
|
|
|
skip_rest_ = true;
|
2017-10-05 17:25:52 +08:00
|
|
|
} else
|
|
|
|
frame[self_.node_symbol_] = expansion.second;
|
2017-07-30 07:15:43 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// place edges on the frame in the correct order
|
|
|
|
std::reverse(edge_list.begin(), edge_list.end());
|
|
|
|
frame[self_.edge_symbol_] = std::move(edge_list);
|
2017-07-30 07:15:43 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
return true;
|
2017-07-30 07:15:43 +08:00
|
|
|
}
|
|
|
|
}
|
2017-10-05 17:25:52 +08:00
|
|
|
void Reset() override {
|
|
|
|
input_cursor_->Reset();
|
|
|
|
processed_.clear();
|
|
|
|
to_visit_next_.clear();
|
|
|
|
to_visit_current_.clear();
|
|
|
|
}
|
2017-07-30 07:15:43 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
private:
|
|
|
|
const ExpandVariable &self_;
|
|
|
|
const std::unique_ptr<query::plan::Cursor> input_cursor_;
|
|
|
|
|
2017-10-06 15:33:12 +08:00
|
|
|
// Depth bounds. Calculated on each pull from the input, the initial value is
|
|
|
|
// irrelevant.
|
|
|
|
int lower_bound_{-1};
|
2017-10-05 17:25:52 +08:00
|
|
|
int upper_bound_{-1};
|
|
|
|
|
2018-07-06 21:26:59 +08:00
|
|
|
// when set to true, expansion is restarted from a new source
|
|
|
|
bool skip_rest_{false};
|
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// maps vertices to the edge they got expanded from. it is an optional
|
|
|
|
// edge because the root does not get expanded from anything.
|
|
|
|
// contains visited vertices as well as those scheduled to be visited.
|
|
|
|
std::unordered_map<VertexAccessor, std::experimental::optional<EdgeAccessor>>
|
|
|
|
processed_;
|
|
|
|
// edge/vertex pairs we have yet to visit, for current and next depth
|
|
|
|
std::deque<std::pair<EdgeAccessor, VertexAccessor>> to_visit_current_;
|
|
|
|
std::deque<std::pair<EdgeAccessor, VertexAccessor>> to_visit_next_;
|
|
|
|
};
|
|
|
|
|
2018-05-15 23:38:47 +08:00
|
|
|
class DistributedExpandBfsCursor : public query::plan::Cursor {
|
|
|
|
public:
|
|
|
|
DistributedExpandBfsCursor(const ExpandVariable &self,
|
|
|
|
database::GraphDbAccessor &db)
|
|
|
|
: self_(self), db_(db), input_cursor_(self_.input_->MakeCursor(db)) {
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
// TODO: Pass in a DistributedGraphDb.
|
|
|
|
if (auto *distributed_db =
|
|
|
|
dynamic_cast<database::DistributedGraphDb *>(&db.db())) {
|
|
|
|
bfs_subcursor_clients_ = &distributed_db->bfs_subcursor_clients();
|
|
|
|
}
|
|
|
|
CHECK(bfs_subcursor_clients_);
|
|
|
|
subcursor_ids_ = bfs_subcursor_clients_->CreateBfsSubcursors(
|
2018-05-15 23:38:47 +08:00
|
|
|
db_.transaction_id(), self_.direction(), self_.edge_types(),
|
|
|
|
self_.graph_view());
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
bfs_subcursor_clients_->RegisterSubcursors(subcursor_ids_);
|
2018-05-15 23:38:47 +08:00
|
|
|
VLOG(10) << "BFS subcursors initialized";
|
|
|
|
pull_pos_ = subcursor_ids_.end();
|
|
|
|
}
|
|
|
|
|
|
|
|
~DistributedExpandBfsCursor() {
|
|
|
|
VLOG(10) << "Removing BFS subcursors";
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
bfs_subcursor_clients_->RemoveBfsSubcursors(subcursor_ids_);
|
2018-05-15 23:38:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool Pull(Frame &frame, Context &context) override {
|
|
|
|
// TODO(mtomic): lambda filtering in distributed
|
|
|
|
if (self_.filter_lambda_.expression) {
|
|
|
|
throw utils::NotYetImplemented("lambda filtering in distributed BFS");
|
|
|
|
}
|
|
|
|
|
|
|
|
// Evaluator for the filtering condition and expansion depth.
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, self_.graph_view_);
|
2018-05-15 23:38:47 +08:00
|
|
|
|
|
|
|
while (true) {
|
|
|
|
TypedValue last_vertex;
|
|
|
|
|
2018-07-06 21:26:59 +08:00
|
|
|
if (!skip_rest_) {
|
|
|
|
if (current_depth_ >= lower_bound_) {
|
|
|
|
for (; pull_pos_ != subcursor_ids_.end(); ++pull_pos_) {
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
auto vertex = bfs_subcursor_clients_->Pull(pull_pos_->first,
|
|
|
|
pull_pos_->second, &db_);
|
2018-07-06 21:26:59 +08:00
|
|
|
if (vertex) {
|
|
|
|
last_vertex = *vertex;
|
|
|
|
SwitchAccessor(last_vertex.ValueVertex(), self_.graph_view_);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
VLOG(10) << "Nothing to pull from " << pull_pos_->first;
|
2018-05-15 23:38:47 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2018-07-06 21:26:59 +08:00
|
|
|
if (last_vertex.IsVertex()) {
|
|
|
|
// Handle existence flag
|
|
|
|
if (self_.existing_node_) {
|
|
|
|
TypedValue &node = frame[self_.node_symbol_];
|
|
|
|
// Due to optional matching the existing node could be null
|
|
|
|
if (node.IsNull() || (node != last_vertex).ValueBool()) continue;
|
|
|
|
// There is no point in traversing the rest of the graph because BFS
|
|
|
|
// can find only one path to a certain node.
|
|
|
|
skip_rest_ = true;
|
|
|
|
} else {
|
|
|
|
frame[self_.node_symbol_] = last_vertex;
|
|
|
|
}
|
2018-05-15 23:38:47 +08:00
|
|
|
|
2018-07-06 21:26:59 +08:00
|
|
|
VLOG(10) << "Expanded to vertex: " << last_vertex;
|
|
|
|
|
|
|
|
// Reconstruct path
|
|
|
|
std::vector<TypedValue> edges;
|
|
|
|
|
|
|
|
// During path reconstruction, edges crossing worker boundary are
|
|
|
|
// obtained from edge owner to reduce network traffic. If the last
|
|
|
|
// worker queried for its path segment owned the crossing edge,
|
|
|
|
// `current_vertex_addr` will be set. Otherwise, `current_edge_addr`
|
|
|
|
// will be set.
|
|
|
|
std::experimental::optional<storage::VertexAddress>
|
|
|
|
current_vertex_addr = last_vertex.ValueVertex().GlobalAddress();
|
|
|
|
std::experimental::optional<storage::EdgeAddress> current_edge_addr;
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
DCHECK(static_cast<bool>(current_edge_addr) ^
|
|
|
|
static_cast<bool>(current_vertex_addr))
|
|
|
|
<< "Exactly one of `current_edge_addr` or "
|
|
|
|
"`current_vertex_addr` "
|
|
|
|
"should be set during path reconstruction";
|
|
|
|
auto ret = current_edge_addr
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
? bfs_subcursor_clients_->ReconstructPath(
|
2018-07-06 21:26:59 +08:00
|
|
|
subcursor_ids_, *current_edge_addr, &db_)
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
: bfs_subcursor_clients_->ReconstructPath(
|
2018-07-06 21:26:59 +08:00
|
|
|
subcursor_ids_, *current_vertex_addr, &db_);
|
|
|
|
edges.insert(edges.end(), ret.edges.begin(), ret.edges.end());
|
|
|
|
current_vertex_addr = ret.next_vertex;
|
|
|
|
current_edge_addr = ret.next_edge;
|
|
|
|
if (!current_vertex_addr && !current_edge_addr) break;
|
|
|
|
}
|
|
|
|
std::reverse(edges.begin(), edges.end());
|
|
|
|
for (auto &edge : edges)
|
|
|
|
SwitchAccessor(edge.ValueEdge(), self_.graph_view_);
|
|
|
|
frame[self_.edge_symbol_] = std::move(edges);
|
|
|
|
return true;
|
2018-05-15 23:38:47 +08:00
|
|
|
}
|
|
|
|
|
2018-07-06 21:26:59 +08:00
|
|
|
// We're done pulling for this level
|
|
|
|
pull_pos_ = subcursor_ids_.begin();
|
|
|
|
|
|
|
|
// Try to expand again
|
|
|
|
if (current_depth_ < upper_bound_) {
|
|
|
|
VLOG(10) << "Trying to expand again...";
|
|
|
|
current_depth_++;
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
bfs_subcursor_clients_->PrepareForExpand(subcursor_ids_, false);
|
|
|
|
if (bfs_subcursor_clients_->ExpandLevel(subcursor_ids_)) {
|
2018-07-06 21:26:59 +08:00
|
|
|
continue;
|
|
|
|
}
|
2018-05-15 23:38:47 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
VLOG(10) << "Trying to get a new source...";
|
|
|
|
// We're done with this source, try getting a new one
|
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
|
|
|
|
|
|
|
auto vertex_value = frame[self_.input_symbol_];
|
|
|
|
|
|
|
|
// It is possible that the vertex is Null due to optional matching.
|
|
|
|
if (vertex_value.IsNull()) continue;
|
|
|
|
|
|
|
|
auto vertex = vertex_value.ValueVertex();
|
|
|
|
lower_bound_ = self_.lower_bound_
|
|
|
|
? EvaluateInt(evaluator, self_.lower_bound_,
|
|
|
|
"Min depth in breadth-first expansion")
|
|
|
|
: 1;
|
|
|
|
upper_bound_ = self_.upper_bound_
|
|
|
|
? EvaluateInt(evaluator, self_.upper_bound_,
|
|
|
|
"Max depth in breadth-first expansion")
|
|
|
|
: std::numeric_limits<int>::max();
|
2018-07-06 21:26:59 +08:00
|
|
|
skip_rest_ = false;
|
2018-05-15 23:38:47 +08:00
|
|
|
|
|
|
|
if (upper_bound_ < 1) {
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Max depth in breadth-first expansion must be at least 1");
|
|
|
|
}
|
|
|
|
|
|
|
|
VLOG(10) << "Starting BFS from " << vertex << " with limits "
|
|
|
|
<< lower_bound_ << ".." << upper_bound_;
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
bfs_subcursor_clients_->PrepareForExpand(subcursor_ids_, true);
|
|
|
|
bfs_subcursor_clients_->SetSource(subcursor_ids_, vertex.GlobalAddress());
|
2018-05-15 23:38:47 +08:00
|
|
|
current_depth_ = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override {
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
bfs_subcursor_clients_->ResetSubcursors(subcursor_ids_);
|
2018-07-06 21:12:45 +08:00
|
|
|
pull_pos_ = subcursor_ids_.end();
|
2018-05-15 23:38:47 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
const ExpandVariable &self_;
|
|
|
|
database::GraphDbAccessor &db_;
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
distributed::BfsRpcClients *bfs_subcursor_clients_{nullptr};
|
2018-05-15 23:38:47 +08:00
|
|
|
const std::unique_ptr<query::plan::Cursor> input_cursor_;
|
|
|
|
|
|
|
|
// Depth bounds. Calculated on each pull from the input, the initial value
|
|
|
|
// is irrelevant.
|
|
|
|
int lower_bound_{-1};
|
|
|
|
int upper_bound_{-1};
|
|
|
|
|
2018-07-06 21:26:59 +08:00
|
|
|
// When set to true, expansion is restarted from a new source.
|
|
|
|
bool skip_rest_{false};
|
|
|
|
|
2018-05-15 23:38:47 +08:00
|
|
|
// Current depth. Reset for each new expansion, the initial value is
|
|
|
|
// irrelevant.
|
|
|
|
int current_depth_{-1};
|
|
|
|
|
|
|
|
// Map from worker IDs to their corresponding subcursors.
|
Replace boost with capnp in RPC
Summary:
Converts the RPC stack to use Cap'n Proto for serialization instead of
boost. There are still some traces of boost in other places in the code,
but most of it is removed. A future diff should cleanup boost for good.
The RPC API is now changed to be more flexible with regards to how
serialize data. This makes the simplest cases a bit more verbose, but
allows complex serialization code to be correctly written instead of
relying on hacks. (For reference, look for the old serialization of
`PullRpc` which had a nasty pointer hacks to inject accessors in
`TypedValue`.)
Since RPC messages were uselessly modeled via inheritance of Message
base class, that class is now removed. Furthermore, that approach
doesn't really work with Cap'n Proto. Instead, each message type is
required to have some type information. This can be automated, so
`define-rpc` has been added to LCP, which hopefully simplifies defining
new RPC request and response messages.
Specify Cap'n Proto schema ID in cmake
This preserves Cap'n Proto generated typeIds across multiple generations
of capnp schemas through LCP. It is imperative that typeId stays the
same to ensure that different compilations of Memgraph may communicate
via RPC in a distributed cluster.
Use CLOS for meta information on C++ types in LCP
Since some structure slots and functions have started to repeat
themselves, it makes sense to model C++ meta information via Common Lisp
Object System.
Depends on D1391
Reviewers: buda, dgleich, mferencevic, mtomic, mculinovic, msantl
Reviewed By: msantl
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D1407
2018-06-04 15:48:48 +08:00
|
|
|
std::unordered_map<int16_t, int64_t> subcursor_ids_;
|
2018-05-15 23:38:47 +08:00
|
|
|
|
|
|
|
// Next worker master should try pulling from.
|
Replace boost with capnp in RPC
Summary:
Converts the RPC stack to use Cap'n Proto for serialization instead of
boost. There are still some traces of boost in other places in the code,
but most of it is removed. A future diff should cleanup boost for good.
The RPC API is now changed to be more flexible with regards to how
serialize data. This makes the simplest cases a bit more verbose, but
allows complex serialization code to be correctly written instead of
relying on hacks. (For reference, look for the old serialization of
`PullRpc` which had a nasty pointer hacks to inject accessors in
`TypedValue`.)
Since RPC messages were uselessly modeled via inheritance of Message
base class, that class is now removed. Furthermore, that approach
doesn't really work with Cap'n Proto. Instead, each message type is
required to have some type information. This can be automated, so
`define-rpc` has been added to LCP, which hopefully simplifies defining
new RPC request and response messages.
Specify Cap'n Proto schema ID in cmake
This preserves Cap'n Proto generated typeIds across multiple generations
of capnp schemas through LCP. It is imperative that typeId stays the
same to ensure that different compilations of Memgraph may communicate
via RPC in a distributed cluster.
Use CLOS for meta information on C++ types in LCP
Since some structure slots and functions have started to repeat
themselves, it makes sense to model C++ meta information via Common Lisp
Object System.
Depends on D1391
Reviewers: buda, dgleich, mferencevic, mtomic, mculinovic, msantl
Reviewed By: msantl
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D1407
2018-06-04 15:48:48 +08:00
|
|
|
std::unordered_map<int16_t, int64_t>::iterator pull_pos_;
|
2018-05-15 23:38:47 +08:00
|
|
|
};
|
|
|
|
|
2018-02-08 18:45:30 +08:00
|
|
|
class ExpandWeightedShortestPathCursor : public query::plan::Cursor {
|
|
|
|
public:
|
|
|
|
ExpandWeightedShortestPathCursor(const ExpandVariable &self,
|
|
|
|
database::GraphDbAccessor &db)
|
2018-06-28 22:04:29 +08:00
|
|
|
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
|
2018-02-08 18:45:30 +08:00
|
|
|
|
|
|
|
bool Pull(Frame &frame, Context &context) override {
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, self_.graph_view_);
|
2018-04-19 17:25:02 +08:00
|
|
|
auto create_state = [this](VertexAccessor vertex, int depth) {
|
|
|
|
return std::make_pair(vertex, upper_bound_set_ ? depth : 0);
|
|
|
|
};
|
|
|
|
|
|
|
|
// For the given (edge, vertex, weight, depth) tuple checks if they
|
|
|
|
// satisfy the "where" condition. if so, places them in the priority queue.
|
|
|
|
auto expand_pair = [this, &evaluator, &frame, &create_state](
|
2018-06-15 22:12:44 +08:00
|
|
|
EdgeAccessor edge, VertexAccessor vertex, double weight, int depth) {
|
2018-02-08 18:45:30 +08:00
|
|
|
SwitchAccessor(edge, self_.graph_view_);
|
|
|
|
SwitchAccessor(vertex, self_.graph_view_);
|
|
|
|
|
|
|
|
if (self_.filter_lambda_.expression) {
|
|
|
|
frame[self_.filter_lambda_.inner_edge_symbol] = edge;
|
|
|
|
frame[self_.filter_lambda_.inner_node_symbol] = vertex;
|
|
|
|
|
|
|
|
if (!EvaluateFilter(evaluator, self_.filter_lambda_.expression)) return;
|
|
|
|
}
|
|
|
|
|
|
|
|
frame[self_.weight_lambda_->inner_edge_symbol] = edge;
|
|
|
|
frame[self_.weight_lambda_->inner_node_symbol] = vertex;
|
|
|
|
|
|
|
|
TypedValue typed_weight =
|
|
|
|
self_.weight_lambda_->expression->Accept(evaluator);
|
|
|
|
|
|
|
|
if (!typed_weight.IsNumeric()) {
|
|
|
|
throw QueryRuntimeException("Calculated weight must be numeric, got {}",
|
|
|
|
typed_weight.type());
|
|
|
|
}
|
|
|
|
if ((typed_weight < 0).Value<bool>()) {
|
|
|
|
throw QueryRuntimeException("Calculated weight can't be negative!");
|
|
|
|
}
|
|
|
|
|
2018-04-19 17:25:02 +08:00
|
|
|
auto next_state = create_state(vertex, depth);
|
|
|
|
auto next_weight = weight + typed_weight;
|
|
|
|
auto found_it = total_cost_.find(next_state);
|
|
|
|
if (found_it != total_cost_.end() &&
|
|
|
|
found_it->second.Value<double>() <= next_weight.Value<double>())
|
2018-02-08 18:45:30 +08:00
|
|
|
return;
|
|
|
|
|
2018-04-19 17:25:02 +08:00
|
|
|
pq_.push({next_weight.Value<double>(), depth + 1, vertex, edge});
|
2018-02-08 18:45:30 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
// Populates the priority queue structure with expansions
|
|
|
|
// from the given vertex. skips expansions that don't satisfy
|
|
|
|
// the "where" condition.
|
2018-04-19 17:25:02 +08:00
|
|
|
auto expand_from_vertex = [this, &expand_pair](VertexAccessor &vertex,
|
|
|
|
double weight, int depth) {
|
2018-02-08 18:45:30 +08:00
|
|
|
if (self_.direction_ != EdgeAtom::Direction::IN) {
|
|
|
|
for (const EdgeAccessor &edge : vertex.out(&self_.edge_types_)) {
|
2018-04-19 17:25:02 +08:00
|
|
|
expand_pair(edge, edge.to(), weight, depth);
|
2018-02-08 18:45:30 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
if (self_.direction_ != EdgeAtom::Direction::OUT) {
|
|
|
|
for (const EdgeAccessor &edge : vertex.in(&self_.edge_types_)) {
|
2018-04-19 17:25:02 +08:00
|
|
|
expand_pair(edge, edge.from(), weight, depth);
|
2018-02-08 18:45:30 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
while (true) {
|
|
|
|
if (pq_.empty()) {
|
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
|
|
|
auto vertex_value = frame[self_.input_symbol_];
|
|
|
|
if (vertex_value.IsNull()) continue;
|
|
|
|
auto vertex = vertex_value.Value<VertexAccessor>();
|
|
|
|
if (self_.existing_node_) {
|
|
|
|
TypedValue &node = frame[self_.node_symbol_];
|
|
|
|
// Due to optional matching the existing node could be null.
|
|
|
|
// Skip expansion for such nodes.
|
|
|
|
if (node.IsNull()) continue;
|
|
|
|
}
|
|
|
|
SwitchAccessor(vertex, self_.graph_view_);
|
2018-04-19 17:25:02 +08:00
|
|
|
if (self_.upper_bound_) {
|
|
|
|
upper_bound_ =
|
|
|
|
EvaluateInt(evaluator, self_.upper_bound_,
|
|
|
|
"Max depth in weighted shortest path expansion");
|
|
|
|
upper_bound_set_ = true;
|
|
|
|
} else {
|
|
|
|
upper_bound_ = std::numeric_limits<int>::max();
|
|
|
|
upper_bound_set_ = false;
|
|
|
|
}
|
2018-02-08 18:45:30 +08:00
|
|
|
if (upper_bound_ < 1)
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Max depth in weighted shortest path expansion must be greater "
|
|
|
|
"than zero");
|
|
|
|
|
|
|
|
// Clear existing data structures.
|
|
|
|
previous_.clear();
|
2018-04-19 17:25:02 +08:00
|
|
|
total_cost_.clear();
|
|
|
|
yielded_vertices_.clear();
|
|
|
|
|
|
|
|
pq_.push({0.0, 0, vertex, std::experimental::nullopt});
|
|
|
|
// We are adding the starting vertex to the set of yielded vertices
|
|
|
|
// because we don't want to yield paths that end with the starting
|
|
|
|
// vertex.
|
|
|
|
yielded_vertices_.insert(vertex);
|
2018-02-08 18:45:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
while (!pq_.empty()) {
|
|
|
|
auto current = pq_.top();
|
2018-04-19 17:25:02 +08:00
|
|
|
double current_weight = std::get<0>(current);
|
|
|
|
int current_depth = std::get<1>(current);
|
|
|
|
VertexAccessor current_vertex = std::get<2>(current);
|
|
|
|
std::experimental::optional<EdgeAccessor> current_edge =
|
|
|
|
std::get<3>(current);
|
2018-02-08 18:45:30 +08:00
|
|
|
pq_.pop();
|
|
|
|
|
2018-04-19 17:25:02 +08:00
|
|
|
auto current_state = create_state(current_vertex, current_depth);
|
|
|
|
|
|
|
|
// Check if the vertex has already been processed.
|
|
|
|
if (total_cost_.find(current_state) != total_cost_.end()) {
|
2018-02-08 18:45:30 +08:00
|
|
|
continue;
|
|
|
|
}
|
2018-04-19 17:25:02 +08:00
|
|
|
previous_.emplace(current_state, current_edge);
|
|
|
|
total_cost_.emplace(current_state, current_weight);
|
|
|
|
|
|
|
|
// Expand only if what we've just expanded is less than max depth.
|
|
|
|
if (current_depth < upper_bound_)
|
|
|
|
expand_from_vertex(current_vertex, current_weight, current_depth);
|
|
|
|
|
|
|
|
// If we yielded a path for a vertex already, make the expansion but
|
|
|
|
// don't return the path again.
|
|
|
|
if (yielded_vertices_.find(current_vertex) != yielded_vertices_.end())
|
|
|
|
continue;
|
2018-02-08 18:45:30 +08:00
|
|
|
|
|
|
|
// Reconstruct the path.
|
2018-04-19 17:25:02 +08:00
|
|
|
auto last_vertex = current_vertex;
|
|
|
|
auto last_depth = current_depth;
|
2018-02-08 18:45:30 +08:00
|
|
|
std::vector<TypedValue> edge_list{};
|
|
|
|
while (true) {
|
|
|
|
// Origin_vertex must be in previous.
|
2018-04-19 17:25:02 +08:00
|
|
|
const auto &previous_edge =
|
|
|
|
previous_.find(create_state(last_vertex, last_depth))->second;
|
2018-02-08 18:45:30 +08:00
|
|
|
if (!previous_edge) break;
|
|
|
|
last_vertex = previous_edge->from() == last_vertex
|
|
|
|
? previous_edge->to()
|
|
|
|
: previous_edge->from();
|
2018-04-19 17:25:02 +08:00
|
|
|
last_depth--;
|
2018-02-08 18:45:30 +08:00
|
|
|
edge_list.push_back(previous_edge.value());
|
|
|
|
}
|
|
|
|
|
|
|
|
// Place destination node on the frame, handle existence flag.
|
|
|
|
if (self_.existing_node_) {
|
|
|
|
TypedValue &node = frame[self_.node_symbol_];
|
2018-04-19 17:25:02 +08:00
|
|
|
if ((node != current_vertex).Value<bool>())
|
2018-02-08 18:45:30 +08:00
|
|
|
continue;
|
|
|
|
else
|
|
|
|
// Prevent expanding other paths, because we found the
|
|
|
|
// shortest to existing node.
|
|
|
|
ClearQueue();
|
|
|
|
} else {
|
2018-04-19 17:25:02 +08:00
|
|
|
frame[self_.node_symbol_] = current_vertex;
|
2018-02-08 18:45:30 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!self_.is_reverse_) {
|
|
|
|
// Place edges on the frame in the correct order.
|
|
|
|
std::reverse(edge_list.begin(), edge_list.end());
|
|
|
|
}
|
|
|
|
frame[self_.edge_symbol_] = std::move(edge_list);
|
2018-04-19 17:25:02 +08:00
|
|
|
frame[self_.total_weight_.value()] = current_weight;
|
|
|
|
yielded_vertices_.insert(current_vertex);
|
2018-02-08 18:45:30 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override {
|
|
|
|
input_cursor_->Reset();
|
|
|
|
previous_.clear();
|
2018-04-19 17:25:02 +08:00
|
|
|
total_cost_.clear();
|
|
|
|
yielded_vertices_.clear();
|
2018-02-08 18:45:30 +08:00
|
|
|
ClearQueue();
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
const ExpandVariable &self_;
|
|
|
|
const std::unique_ptr<query::plan::Cursor> input_cursor_;
|
|
|
|
|
|
|
|
// Upper bound on the path length.
|
|
|
|
int upper_bound_{-1};
|
2018-04-19 17:25:02 +08:00
|
|
|
bool upper_bound_set_{false};
|
|
|
|
|
|
|
|
struct WspStateHash {
|
|
|
|
size_t operator()(const std::pair<VertexAccessor, int> &key) const {
|
2018-04-22 14:31:09 +08:00
|
|
|
return utils::HashCombine<VertexAccessor, int>{}(key.first, key.second);
|
2018-04-19 17:25:02 +08:00
|
|
|
}
|
|
|
|
};
|
2018-02-08 18:45:30 +08:00
|
|
|
|
|
|
|
// Maps vertices to weights they got in expansion.
|
2018-04-19 17:25:02 +08:00
|
|
|
std::unordered_map<std::pair<VertexAccessor, int>, TypedValue, WspStateHash>
|
|
|
|
total_cost_;
|
2018-02-08 18:45:30 +08:00
|
|
|
|
|
|
|
// Maps vertices to edges used to reach them.
|
2018-04-19 17:25:02 +08:00
|
|
|
std::unordered_map<std::pair<VertexAccessor, int>,
|
|
|
|
std::experimental::optional<EdgeAccessor>, WspStateHash>
|
2018-02-08 18:45:30 +08:00
|
|
|
previous_;
|
|
|
|
|
2018-04-19 17:25:02 +08:00
|
|
|
// Keeps track of vertices for which we yielded a path already.
|
|
|
|
std::unordered_set<VertexAccessor> yielded_vertices_;
|
|
|
|
|
2018-02-08 18:45:30 +08:00
|
|
|
// Priority queue comparator. Keep lowest weight on top of the queue.
|
|
|
|
class PriorityQueueComparator {
|
|
|
|
public:
|
|
|
|
bool operator()(
|
2018-04-19 17:25:02 +08:00
|
|
|
const std::tuple<double, int, VertexAccessor,
|
|
|
|
std::experimental::optional<EdgeAccessor>> &lhs,
|
|
|
|
const std::tuple<double, int, VertexAccessor,
|
|
|
|
std::experimental::optional<EdgeAccessor>> &rhs) {
|
|
|
|
return std::get<0>(lhs) > std::get<0>(rhs);
|
2018-02-08 18:45:30 +08:00
|
|
|
}
|
|
|
|
};
|
2018-04-19 17:25:02 +08:00
|
|
|
|
2018-02-08 18:45:30 +08:00
|
|
|
std::priority_queue<
|
2018-04-19 17:25:02 +08:00
|
|
|
std::tuple<double, int, VertexAccessor,
|
|
|
|
std::experimental::optional<EdgeAccessor>>,
|
|
|
|
std::vector<std::tuple<double, int, VertexAccessor,
|
|
|
|
std::experimental::optional<EdgeAccessor>>>,
|
2018-02-08 18:45:30 +08:00
|
|
|
PriorityQueueComparator>
|
|
|
|
pq_;
|
|
|
|
|
|
|
|
void ClearQueue() {
|
|
|
|
while (!pq_.empty()) pq_.pop();
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> ExpandVariable::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2018-05-15 23:38:47 +08:00
|
|
|
if (type_ == EdgeAtom::Type::BREADTH_FIRST) {
|
|
|
|
if (db.db().type() == database::GraphDb::Type::SINGLE_NODE) {
|
|
|
|
return std::make_unique<ExpandBfsCursor>(*this, db);
|
|
|
|
} else {
|
|
|
|
return std::make_unique<DistributedExpandBfsCursor>(*this, db);
|
|
|
|
}
|
|
|
|
} else if (type_ == EdgeAtom::Type::WEIGHTED_SHORTEST_PATH) {
|
2018-02-08 18:45:30 +08:00
|
|
|
return std::make_unique<ExpandWeightedShortestPathCursor>(*this, db);
|
2018-05-15 23:38:47 +08:00
|
|
|
} else {
|
2017-10-05 17:25:52 +08:00
|
|
|
return std::make_unique<ExpandVariableCursor>(*this, db);
|
2018-05-15 23:38:47 +08:00
|
|
|
}
|
2017-07-30 07:15:43 +08:00
|
|
|
}
|
|
|
|
|
2017-09-18 20:40:36 +08:00
|
|
|
class ConstructNamedPathCursor : public Cursor {
|
|
|
|
public:
|
2018-01-12 22:17:04 +08:00
|
|
|
ConstructNamedPathCursor(const ConstructNamedPath &self,
|
|
|
|
database::GraphDbAccessor &db)
|
2017-09-18 20:40:36 +08:00
|
|
|
: self_(self), input_cursor_(self_.input()->MakeCursor(db)) {}
|
|
|
|
|
|
|
|
bool Pull(Frame &frame, Context &context) override {
|
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
|
|
|
|
|
|
|
auto symbol_it = self_.path_elements().begin();
|
2017-10-11 19:19:10 +08:00
|
|
|
DCHECK(symbol_it != self_.path_elements().end())
|
|
|
|
<< "Named path must contain at least one node";
|
2017-09-18 20:40:36 +08:00
|
|
|
|
|
|
|
TypedValue start_vertex = frame[*symbol_it++];
|
|
|
|
|
|
|
|
// In an OPTIONAL MATCH everything could be Null.
|
|
|
|
if (start_vertex.IsNull()) {
|
|
|
|
frame[self_.path_symbol()] = TypedValue::Null;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-10-11 19:19:10 +08:00
|
|
|
DCHECK(start_vertex.IsVertex())
|
|
|
|
<< "First named path element must be a vertex";
|
2017-09-18 20:40:36 +08:00
|
|
|
query::Path path(start_vertex.ValueVertex());
|
|
|
|
|
|
|
|
// If the last path element symbol was for an edge list, then
|
2017-10-05 17:25:52 +08:00
|
|
|
// the next symbol is a vertex and it should not append to the path
|
|
|
|
// because
|
2017-09-18 20:40:36 +08:00
|
|
|
// expansion already did it.
|
|
|
|
bool last_was_edge_list = false;
|
|
|
|
|
|
|
|
for (; symbol_it != self_.path_elements().end(); symbol_it++) {
|
|
|
|
TypedValue expansion = frame[*symbol_it];
|
|
|
|
// We can have Null (OPTIONAL MATCH), a vertex, an edge, or an edge
|
|
|
|
// list (variable expand or BFS).
|
|
|
|
switch (expansion.type()) {
|
|
|
|
case TypedValue::Type::Null:
|
|
|
|
frame[self_.path_symbol()] = TypedValue::Null;
|
|
|
|
return true;
|
|
|
|
case TypedValue::Type::Vertex:
|
|
|
|
if (!last_was_edge_list) path.Expand(expansion.ValueVertex());
|
|
|
|
last_was_edge_list = false;
|
|
|
|
break;
|
|
|
|
case TypedValue::Type::Edge:
|
|
|
|
path.Expand(expansion.ValueEdge());
|
|
|
|
break;
|
|
|
|
case TypedValue::Type::List: {
|
|
|
|
last_was_edge_list = true;
|
2017-10-05 17:25:52 +08:00
|
|
|
// We need to expand all edges in the list and intermediary
|
|
|
|
// vertices.
|
2017-09-18 20:40:36 +08:00
|
|
|
const std::vector<TypedValue> &edges = expansion.ValueList();
|
|
|
|
for (const auto &edge_value : edges) {
|
|
|
|
const EdgeAccessor &edge = edge_value.ValueEdge();
|
|
|
|
const VertexAccessor from = edge.from();
|
|
|
|
if (path.vertices().back() == from)
|
|
|
|
path.Expand(edge, edge.to());
|
|
|
|
else
|
|
|
|
path.Expand(edge, from);
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
2017-10-11 19:19:10 +08:00
|
|
|
LOG(FATAL) << "Unsupported type in named path construction";
|
2017-09-18 20:40:36 +08:00
|
|
|
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
frame[self_.path_symbol()] = path;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override { input_cursor_->Reset(); }
|
|
|
|
|
|
|
|
private:
|
|
|
|
const ConstructNamedPath self_;
|
|
|
|
const std::unique_ptr<Cursor> input_cursor_;
|
|
|
|
};
|
|
|
|
|
|
|
|
ACCEPT_WITH_INPUT(ConstructNamedPath)
|
|
|
|
|
2017-09-20 21:28:00 +08:00
|
|
|
std::unique_ptr<Cursor> ConstructNamedPath::MakeCursor(
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db) const {
|
2017-09-18 20:40:36 +08:00
|
|
|
return std::make_unique<ConstructNamedPathCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> ConstructNamedPath::ModifiedSymbols(
|
|
|
|
const SymbolTable &table) const {
|
|
|
|
auto symbols = input_->ModifiedSymbols(table);
|
|
|
|
symbols.emplace_back(path_symbol_);
|
|
|
|
return symbols;
|
|
|
|
}
|
|
|
|
|
2017-04-28 16:37:49 +08:00
|
|
|
Filter::Filter(const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
Expression *expression)
|
|
|
|
: input_(input ? input : std::make_shared<Once>()),
|
|
|
|
expression_(expression) {}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(Filter)
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Filter::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-03-30 17:15:57 +08:00
|
|
|
return std::make_unique<FilterCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> Filter::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
return input_->ModifiedSymbols(table);
|
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
Filter::FilterCursor::FilterCursor(const Filter &self,
|
|
|
|
database::GraphDbAccessor &db)
|
2018-06-28 22:04:29 +08:00
|
|
|
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Filter::FilterCursor::Pull(Frame &frame, Context &context) {
|
2017-09-18 20:40:36 +08:00
|
|
|
// Like all filters, newly set values should not affect filtering of old
|
|
|
|
// nodes and edges.
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::OLD);
|
2017-09-13 16:27:12 +08:00
|
|
|
while (input_cursor_->Pull(frame, context)) {
|
2017-08-30 21:37:00 +08:00
|
|
|
if (EvaluateFilter(evaluator, self_.expression_)) return true;
|
2017-03-30 17:15:57 +08:00
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void Filter::FilterCursor::Reset() { input_cursor_->Reset(); }
|
|
|
|
|
2017-04-11 18:29:20 +08:00
|
|
|
Produce::Produce(const std::shared_ptr<LogicalOperator> &input,
|
2017-11-09 20:46:37 +08:00
|
|
|
const std::vector<NamedExpression *> &named_expressions)
|
2017-04-21 16:57:53 +08:00
|
|
|
: input_(input ? input : std::make_shared<Once>()),
|
|
|
|
named_expressions_(named_expressions) {}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(Produce)
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Produce::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-03-30 17:15:57 +08:00
|
|
|
return std::make_unique<ProduceCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2017-09-19 22:58:22 +08:00
|
|
|
std::vector<Symbol> Produce::OutputSymbols(
|
|
|
|
const SymbolTable &symbol_table) const {
|
2017-04-26 22:12:39 +08:00
|
|
|
std::vector<Symbol> symbols;
|
|
|
|
for (const auto &named_expr : named_expressions_) {
|
|
|
|
symbols.emplace_back(symbol_table.at(*named_expr));
|
|
|
|
}
|
|
|
|
return symbols;
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> Produce::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
return OutputSymbols(table);
|
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
Produce::ProduceCursor::ProduceCursor(const Produce &self,
|
|
|
|
database::GraphDbAccessor &db)
|
2018-06-28 22:04:29 +08:00
|
|
|
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
|
2017-04-24 16:16:53 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Produce::ProduceCursor::Pull(Frame &frame, Context &context) {
|
|
|
|
if (input_cursor_->Pull(frame, context)) {
|
2017-04-21 16:57:53 +08:00
|
|
|
// Produce should always yield the latest results.
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::NEW);
|
2017-03-30 17:15:57 +08:00
|
|
|
for (auto named_expr : self_.named_expressions_)
|
|
|
|
named_expr->Accept(evaluator);
|
|
|
|
return true;
|
2017-04-21 16:57:53 +08:00
|
|
|
}
|
|
|
|
return false;
|
2017-03-30 17:15:57 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void Produce::ProduceCursor::Reset() { input_cursor_->Reset(); }
|
|
|
|
|
2017-03-30 17:15:57 +08:00
|
|
|
Delete::Delete(const std::shared_ptr<LogicalOperator> &input_,
|
|
|
|
const std::vector<Expression *> &expressions, bool detach_)
|
|
|
|
: input_(input_), expressions_(expressions), detach_(detach_) {}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(Delete)
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Delete::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-03-30 17:15:57 +08:00
|
|
|
return std::make_unique<DeleteCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> Delete::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
return input_->ModifiedSymbols(table);
|
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
Delete::DeleteCursor::DeleteCursor(const Delete &self,
|
|
|
|
database::GraphDbAccessor &db)
|
2017-03-30 17:15:57 +08:00
|
|
|
: self_(self), db_(db), input_cursor_(self_.input_->MakeCursor(db)) {}
|
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Delete::DeleteCursor::Pull(Frame &frame, Context &context) {
|
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// Delete should get the latest information, this way it is also possible
|
|
|
|
// to
|
2017-04-03 20:32:29 +08:00
|
|
|
// delete newly added nodes and edges.
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::NEW);
|
2017-04-14 21:03:18 +08:00
|
|
|
// collect expressions results so edges can get deleted before vertices
|
|
|
|
// this is necessary because an edge that gets deleted could block vertex
|
|
|
|
// deletion
|
|
|
|
std::vector<TypedValue> expression_results;
|
|
|
|
expression_results.reserve(self_.expressions_.size());
|
2017-03-30 17:15:57 +08:00
|
|
|
for (Expression *expression : self_.expressions_) {
|
2017-05-16 16:55:02 +08:00
|
|
|
expression_results.emplace_back(expression->Accept(evaluator));
|
2017-04-14 21:03:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
// delete edges first
|
|
|
|
for (TypedValue &expression_result : expression_results)
|
|
|
|
if (expression_result.type() == TypedValue::Type::Edge)
|
2017-08-09 21:36:01 +08:00
|
|
|
db_.RemoveEdge(expression_result.Value<EdgeAccessor>());
|
2017-04-14 21:03:18 +08:00
|
|
|
|
|
|
|
// delete vertices
|
|
|
|
for (TypedValue &expression_result : expression_results)
|
|
|
|
switch (expression_result.type()) {
|
|
|
|
case TypedValue::Type::Vertex: {
|
|
|
|
VertexAccessor &va = expression_result.Value<VertexAccessor>();
|
|
|
|
va.SwitchNew(); // necessary because an edge deletion could have
|
|
|
|
// updated
|
2017-03-30 17:15:57 +08:00
|
|
|
if (self_.detach_)
|
2017-08-09 21:36:01 +08:00
|
|
|
db_.DetachRemoveVertex(va);
|
|
|
|
else if (!db_.RemoveVertex(va))
|
2018-02-28 17:36:48 +08:00
|
|
|
throw RemoveAttachedVertexException();
|
2017-03-30 17:15:57 +08:00
|
|
|
break;
|
2017-04-14 21:03:18 +08:00
|
|
|
}
|
2017-05-03 21:55:08 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// skip Edges (already deleted) and Nulls (can occur in optional
|
|
|
|
// match)
|
2017-03-30 17:15:57 +08:00
|
|
|
case TypedValue::Type::Edge:
|
2017-05-03 21:55:08 +08:00
|
|
|
case TypedValue::Type::Null:
|
2017-03-30 17:15:57 +08:00
|
|
|
break;
|
2017-04-14 21:03:18 +08:00
|
|
|
// check we're not trying to delete anything except vertices and edges
|
2017-03-30 17:15:57 +08:00
|
|
|
default:
|
2017-06-12 21:12:31 +08:00
|
|
|
throw QueryRuntimeException("Can only delete edges and vertices");
|
2017-03-30 17:15:57 +08:00
|
|
|
}
|
2017-04-14 21:03:18 +08:00
|
|
|
|
2017-03-30 17:15:57 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void Delete::DeleteCursor::Reset() { input_cursor_->Reset(); }
|
|
|
|
|
2017-04-11 18:29:20 +08:00
|
|
|
SetProperty::SetProperty(const std::shared_ptr<LogicalOperator> &input,
|
2017-03-30 17:15:57 +08:00
|
|
|
PropertyLookup *lhs, Expression *rhs)
|
|
|
|
: input_(input), lhs_(lhs), rhs_(rhs) {}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(SetProperty)
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> SetProperty::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-03-30 17:15:57 +08:00
|
|
|
return std::make_unique<SetPropertyCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> SetProperty::ModifiedSymbols(
|
|
|
|
const SymbolTable &table) const {
|
|
|
|
return input_->ModifiedSymbols(table);
|
|
|
|
}
|
|
|
|
|
2017-04-11 18:29:20 +08:00
|
|
|
SetProperty::SetPropertyCursor::SetPropertyCursor(const SetProperty &self,
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db)
|
2018-06-28 22:04:29 +08:00
|
|
|
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool SetProperty::SetPropertyCursor::Pull(Frame &frame, Context &context) {
|
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-04-03 20:32:29 +08:00
|
|
|
// Set, just like Create needs to see the latest changes.
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::NEW);
|
2017-05-16 16:55:02 +08:00
|
|
|
TypedValue lhs = self_.lhs_->expression_->Accept(evaluator);
|
|
|
|
TypedValue rhs = self_.rhs_->Accept(evaluator);
|
2017-03-30 17:15:57 +08:00
|
|
|
|
|
|
|
switch (lhs.type()) {
|
|
|
|
case TypedValue::Type::Vertex:
|
2017-06-12 21:12:31 +08:00
|
|
|
PropsSetChecked(lhs.Value<VertexAccessor>(), self_.lhs_->property_, rhs);
|
2017-03-30 17:15:57 +08:00
|
|
|
break;
|
|
|
|
case TypedValue::Type::Edge:
|
2017-06-12 21:12:31 +08:00
|
|
|
PropsSetChecked(lhs.Value<EdgeAccessor>(), self_.lhs_->property_, rhs);
|
2017-03-30 17:15:57 +08:00
|
|
|
break;
|
2017-05-17 18:15:24 +08:00
|
|
|
case TypedValue::Type::Null:
|
|
|
|
// Skip setting properties on Null (can occur in optional match).
|
|
|
|
break;
|
2017-08-24 06:13:26 +08:00
|
|
|
case TypedValue::Type::Map:
|
2017-10-05 17:25:52 +08:00
|
|
|
// Semantically modifying a map makes sense, but it's not supported due
|
|
|
|
// to
|
2017-08-30 21:37:00 +08:00
|
|
|
// all the copying we do (when PropertyValue -> TypedValue and in
|
|
|
|
// ExpressionEvaluator). So even though we set a map property here, that
|
|
|
|
// is never visible to the user and it's not stored.
|
|
|
|
// TODO: fix above described bug
|
2017-03-30 17:15:57 +08:00
|
|
|
default:
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Properties can only be set on Vertices and Edges");
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void SetProperty::SetPropertyCursor::Reset() { input_cursor_->Reset(); }
|
|
|
|
|
2017-04-11 18:29:20 +08:00
|
|
|
SetProperties::SetProperties(const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
Symbol input_symbol, Expression *rhs, Op op)
|
2017-03-30 17:15:57 +08:00
|
|
|
: input_(input), input_symbol_(input_symbol), rhs_(rhs), op_(op) {}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(SetProperties)
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> SetProperties::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-03-30 17:15:57 +08:00
|
|
|
return std::make_unique<SetPropertiesCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> SetProperties::ModifiedSymbols(
|
|
|
|
const SymbolTable &table) const {
|
|
|
|
return input_->ModifiedSymbols(table);
|
|
|
|
}
|
|
|
|
|
2017-04-11 18:29:20 +08:00
|
|
|
SetProperties::SetPropertiesCursor::SetPropertiesCursor(
|
2018-01-12 22:17:04 +08:00
|
|
|
const SetProperties &self, database::GraphDbAccessor &db)
|
2017-03-30 17:15:57 +08:00
|
|
|
: self_(self), db_(db), input_cursor_(self.input_->MakeCursor(db)) {}
|
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool SetProperties::SetPropertiesCursor::Pull(Frame &frame, Context &context) {
|
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-04-03 20:32:29 +08:00
|
|
|
TypedValue &lhs = frame[self_.input_symbol_];
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-04-03 20:32:29 +08:00
|
|
|
// Set, just like Create needs to see the latest changes.
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::NEW);
|
2017-05-16 16:55:02 +08:00
|
|
|
TypedValue rhs = self_.rhs_->Accept(evaluator);
|
2017-03-30 17:15:57 +08:00
|
|
|
|
|
|
|
switch (lhs.type()) {
|
|
|
|
case TypedValue::Type::Vertex:
|
|
|
|
Set(lhs.Value<VertexAccessor>(), rhs);
|
|
|
|
break;
|
|
|
|
case TypedValue::Type::Edge:
|
|
|
|
Set(lhs.Value<EdgeAccessor>(), rhs);
|
|
|
|
break;
|
2017-05-17 18:15:24 +08:00
|
|
|
case TypedValue::Type::Null:
|
|
|
|
// Skip setting properties on Null (can occur in optional match).
|
|
|
|
break;
|
2017-03-30 17:15:57 +08:00
|
|
|
default:
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Properties can only be set on Vertices and Edges");
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void SetProperties::SetPropertiesCursor::Reset() { input_cursor_->Reset(); }
|
|
|
|
|
2017-03-30 17:15:57 +08:00
|
|
|
template <typename TRecordAccessor>
|
|
|
|
void SetProperties::SetPropertiesCursor::Set(TRecordAccessor &record,
|
2017-04-24 16:16:53 +08:00
|
|
|
const TypedValue &rhs) const {
|
2017-04-03 20:32:29 +08:00
|
|
|
record.SwitchNew();
|
2018-01-17 17:56:06 +08:00
|
|
|
if (self_.op_ == Op::REPLACE) {
|
|
|
|
try {
|
|
|
|
record.PropsClear();
|
|
|
|
} catch (const RecordDeletedError &) {
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Trying to set properties on a deleted graph element.");
|
|
|
|
}
|
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
|
|
|
auto set_props = [&record](const auto &properties) {
|
2018-01-17 17:56:06 +08:00
|
|
|
try {
|
|
|
|
for (const auto &kv : properties) record.PropsSet(kv.first, kv.second);
|
|
|
|
} catch (const RecordDeletedError &) {
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Trying to set properties on a deleted graph element.");
|
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
switch (rhs.type()) {
|
|
|
|
case TypedValue::Type::Edge:
|
|
|
|
set_props(rhs.Value<EdgeAccessor>().Properties());
|
|
|
|
break;
|
|
|
|
case TypedValue::Type::Vertex:
|
|
|
|
set_props(rhs.Value<VertexAccessor>().Properties());
|
|
|
|
break;
|
|
|
|
case TypedValue::Type::Map: {
|
|
|
|
for (const auto &kv : rhs.Value<std::map<std::string, TypedValue>>())
|
2017-08-09 21:36:01 +08:00
|
|
|
PropsSetChecked(record, db_.Property(kv.first), kv.second);
|
2017-03-30 17:15:57 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
default:
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Can only set Vertices, Edges and maps as properties");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// instantiate the SetProperties function with concrete TRecordAccessor
|
|
|
|
// types
|
2017-03-30 17:15:57 +08:00
|
|
|
template void SetProperties::SetPropertiesCursor::Set(
|
2017-04-24 16:16:53 +08:00
|
|
|
RecordAccessor<Vertex> &record, const TypedValue &rhs) const;
|
2017-03-30 17:15:57 +08:00
|
|
|
template void SetProperties::SetPropertiesCursor::Set(
|
2017-04-24 16:16:53 +08:00
|
|
|
RecordAccessor<Edge> &record, const TypedValue &rhs) const;
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-04-11 18:29:20 +08:00
|
|
|
SetLabels::SetLabels(const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
Symbol input_symbol,
|
2018-01-16 17:09:15 +08:00
|
|
|
const std::vector<storage::Label> &labels)
|
2017-03-30 17:15:57 +08:00
|
|
|
: input_(input), input_symbol_(input_symbol), labels_(labels) {}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(SetLabels)
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> SetLabels::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-03-30 17:15:57 +08:00
|
|
|
return std::make_unique<SetLabelsCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> SetLabels::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
return input_->ModifiedSymbols(table);
|
|
|
|
}
|
|
|
|
|
2017-04-11 18:29:20 +08:00
|
|
|
SetLabels::SetLabelsCursor::SetLabelsCursor(const SetLabels &self,
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db)
|
2017-03-30 17:15:57 +08:00
|
|
|
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
|
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool SetLabels::SetLabelsCursor::Pull(Frame &frame, Context &context) {
|
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-04-03 20:32:29 +08:00
|
|
|
TypedValue &vertex_value = frame[self_.input_symbol_];
|
2017-05-17 18:15:24 +08:00
|
|
|
// Skip setting labels on Null (can occur in optional match).
|
|
|
|
if (vertex_value.IsNull()) return true;
|
2017-06-12 21:12:31 +08:00
|
|
|
ExpectType(self_.input_symbol_, vertex_value, TypedValue::Type::Vertex);
|
2017-04-03 20:32:29 +08:00
|
|
|
auto &vertex = vertex_value.Value<VertexAccessor>();
|
|
|
|
vertex.SwitchNew();
|
2018-01-17 17:56:06 +08:00
|
|
|
try {
|
|
|
|
for (auto label : self_.labels_) vertex.add_label(label);
|
|
|
|
} catch (const RecordDeletedError &) {
|
|
|
|
throw QueryRuntimeException("Trying to set labels on a deleted Vertex");
|
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void SetLabels::SetLabelsCursor::Reset() { input_cursor_->Reset(); }
|
|
|
|
|
2017-04-11 18:29:20 +08:00
|
|
|
RemoveProperty::RemoveProperty(const std::shared_ptr<LogicalOperator> &input,
|
2017-03-30 17:15:57 +08:00
|
|
|
PropertyLookup *lhs)
|
|
|
|
: input_(input), lhs_(lhs) {}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(RemoveProperty)
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> RemoveProperty::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-03-30 17:15:57 +08:00
|
|
|
return std::make_unique<RemovePropertyCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> RemoveProperty::ModifiedSymbols(
|
|
|
|
const SymbolTable &table) const {
|
|
|
|
return input_->ModifiedSymbols(table);
|
|
|
|
}
|
|
|
|
|
2017-04-11 18:29:20 +08:00
|
|
|
RemoveProperty::RemovePropertyCursor::RemovePropertyCursor(
|
2018-01-12 22:17:04 +08:00
|
|
|
const RemoveProperty &self, database::GraphDbAccessor &db)
|
2018-06-28 22:04:29 +08:00
|
|
|
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool RemoveProperty::RemovePropertyCursor::Pull(Frame &frame,
|
|
|
|
Context &context) {
|
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-04-03 20:32:29 +08:00
|
|
|
// Remove, just like Delete needs to see the latest changes.
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::NEW);
|
2017-05-16 16:55:02 +08:00
|
|
|
TypedValue lhs = self_.lhs_->expression_->Accept(evaluator);
|
2017-03-30 17:15:57 +08:00
|
|
|
|
|
|
|
switch (lhs.type()) {
|
|
|
|
case TypedValue::Type::Vertex:
|
2018-01-17 17:56:06 +08:00
|
|
|
try {
|
|
|
|
lhs.Value<VertexAccessor>().PropsErase(self_.lhs_->property_);
|
|
|
|
} catch (const RecordDeletedError &) {
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Trying to remove properties from a deleted Vertex");
|
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
break;
|
|
|
|
case TypedValue::Type::Edge:
|
2018-01-17 17:56:06 +08:00
|
|
|
try {
|
|
|
|
lhs.Value<EdgeAccessor>().PropsErase(self_.lhs_->property_);
|
|
|
|
} catch (const RecordDeletedError &) {
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Trying to remove properties from a deleted Edge");
|
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
break;
|
2017-05-17 18:15:24 +08:00
|
|
|
case TypedValue::Type::Null:
|
|
|
|
// Skip removing properties on Null (can occur in optional match).
|
|
|
|
break;
|
2017-03-30 17:15:57 +08:00
|
|
|
default:
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Properties can only be removed on Vertices and Edges");
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void RemoveProperty::RemovePropertyCursor::Reset() { input_cursor_->Reset(); }
|
|
|
|
|
2017-04-11 18:29:20 +08:00
|
|
|
RemoveLabels::RemoveLabels(const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
Symbol input_symbol,
|
2018-01-16 17:09:15 +08:00
|
|
|
const std::vector<storage::Label> &labels)
|
2017-03-30 17:15:57 +08:00
|
|
|
: input_(input), input_symbol_(input_symbol), labels_(labels) {}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(RemoveLabels)
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> RemoveLabels::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-03-30 17:15:57 +08:00
|
|
|
return std::make_unique<RemoveLabelsCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> RemoveLabels::ModifiedSymbols(
|
|
|
|
const SymbolTable &table) const {
|
|
|
|
return input_->ModifiedSymbols(table);
|
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
RemoveLabels::RemoveLabelsCursor::RemoveLabelsCursor(
|
|
|
|
const RemoveLabels &self, database::GraphDbAccessor &db)
|
2017-03-30 17:15:57 +08:00
|
|
|
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
|
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool RemoveLabels::RemoveLabelsCursor::Pull(Frame &frame, Context &context) {
|
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
2017-03-30 17:15:57 +08:00
|
|
|
|
2017-04-03 20:32:29 +08:00
|
|
|
TypedValue &vertex_value = frame[self_.input_symbol_];
|
2017-05-17 18:15:24 +08:00
|
|
|
// Skip removing labels on Null (can occur in optional match).
|
|
|
|
if (vertex_value.IsNull()) return true;
|
2017-06-12 21:12:31 +08:00
|
|
|
ExpectType(self_.input_symbol_, vertex_value, TypedValue::Type::Vertex);
|
2017-04-03 20:32:29 +08:00
|
|
|
auto &vertex = vertex_value.Value<VertexAccessor>();
|
|
|
|
vertex.SwitchNew();
|
2018-01-17 17:56:06 +08:00
|
|
|
try {
|
|
|
|
for (auto label : self_.labels_) vertex.remove_label(label);
|
|
|
|
} catch (const RecordDeletedError &) {
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Trying to remove labels from a deleted Vertex");
|
|
|
|
}
|
2017-03-30 17:15:57 +08:00
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void RemoveLabels::RemoveLabelsCursor::Reset() { input_cursor_->Reset(); }
|
|
|
|
|
2017-04-04 15:04:26 +08:00
|
|
|
template <typename TAccessor>
|
|
|
|
ExpandUniquenessFilter<TAccessor>::ExpandUniquenessFilter(
|
|
|
|
const std::shared_ptr<LogicalOperator> &input, Symbol expand_symbol,
|
|
|
|
const std::vector<Symbol> &previous_symbols)
|
|
|
|
: input_(input),
|
|
|
|
expand_symbol_(expand_symbol),
|
|
|
|
previous_symbols_(previous_symbols) {}
|
|
|
|
|
|
|
|
template <typename TAccessor>
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(ExpandUniquenessFilter<TAccessor>)
|
2017-04-04 15:04:26 +08:00
|
|
|
|
|
|
|
template <typename TAccessor>
|
|
|
|
std::unique_ptr<Cursor> ExpandUniquenessFilter<TAccessor>::MakeCursor(
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db) const {
|
2017-04-04 15:04:26 +08:00
|
|
|
return std::make_unique<ExpandUniquenessFilterCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
template <typename TAccessor>
|
|
|
|
std::vector<Symbol> ExpandUniquenessFilter<TAccessor>::ModifiedSymbols(
|
|
|
|
const SymbolTable &table) const {
|
|
|
|
return input_->ModifiedSymbols(table);
|
|
|
|
}
|
|
|
|
|
2017-04-04 15:04:26 +08:00
|
|
|
template <typename TAccessor>
|
|
|
|
ExpandUniquenessFilter<TAccessor>::ExpandUniquenessFilterCursor::
|
2017-04-11 18:29:20 +08:00
|
|
|
ExpandUniquenessFilterCursor(const ExpandUniquenessFilter &self,
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db)
|
2017-04-04 15:04:26 +08:00
|
|
|
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
|
|
|
|
|
2017-07-20 19:26:54 +08:00
|
|
|
namespace {
|
|
|
|
/**
|
|
|
|
* Returns true if:
|
|
|
|
* - a and b are vertex values and are the same
|
|
|
|
* - a and b are either edge or edge-list values, and there
|
|
|
|
* is at least one matching edge in the two values
|
|
|
|
*/
|
|
|
|
template <typename TAccessor>
|
|
|
|
bool ContainsSame(const TypedValue &a, const TypedValue &b);
|
|
|
|
|
|
|
|
template <>
|
|
|
|
bool ContainsSame<VertexAccessor>(const TypedValue &a, const TypedValue &b) {
|
|
|
|
return a.Value<VertexAccessor>() == b.Value<VertexAccessor>();
|
|
|
|
}
|
|
|
|
|
|
|
|
template <>
|
|
|
|
bool ContainsSame<EdgeAccessor>(const TypedValue &a, const TypedValue &b) {
|
|
|
|
auto compare_to_list = [](const TypedValue &list, const TypedValue &other) {
|
|
|
|
for (const TypedValue &list_elem : list.Value<std::vector<TypedValue>>())
|
|
|
|
if (ContainsSame<EdgeAccessor>(list_elem, other)) return true;
|
|
|
|
return false;
|
|
|
|
};
|
|
|
|
|
|
|
|
if (a.type() == TypedValue::Type::List) return compare_to_list(a, b);
|
|
|
|
if (b.type() == TypedValue::Type::List) return compare_to_list(b, a);
|
|
|
|
|
|
|
|
return a.Value<EdgeAccessor>() == b.Value<EdgeAccessor>();
|
|
|
|
}
|
2017-09-20 21:28:00 +08:00
|
|
|
} // namespace
|
2017-07-20 19:26:54 +08:00
|
|
|
|
2017-04-04 15:04:26 +08:00
|
|
|
template <typename TAccessor>
|
|
|
|
bool ExpandUniquenessFilter<TAccessor>::ExpandUniquenessFilterCursor::Pull(
|
2017-09-13 16:27:12 +08:00
|
|
|
Frame &frame, Context &context) {
|
2017-04-04 15:04:26 +08:00
|
|
|
auto expansion_ok = [&]() {
|
|
|
|
TypedValue &expand_value = frame[self_.expand_symbol_];
|
|
|
|
for (const auto &previous_symbol : self_.previous_symbols_) {
|
|
|
|
TypedValue &previous_value = frame[previous_symbol];
|
2017-10-05 17:25:52 +08:00
|
|
|
// This shouldn't raise a TypedValueException, because the planner
|
2018-02-20 22:56:27 +08:00
|
|
|
// makes sure these are all of the expected type. In case they are not
|
2018-02-08 18:45:30 +08:00
|
|
|
// an error should be raised long before this code is executed.
|
2017-07-20 19:26:54 +08:00
|
|
|
if (ContainsSame<TAccessor>(previous_value, expand_value)) return false;
|
2017-04-04 15:04:26 +08:00
|
|
|
}
|
|
|
|
return true;
|
|
|
|
};
|
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
while (input_cursor_->Pull(frame, context))
|
2017-04-05 21:00:26 +08:00
|
|
|
if (expansion_ok()) return true;
|
2017-04-04 15:04:26 +08:00
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
template <typename TAccessor>
|
|
|
|
void ExpandUniquenessFilter<TAccessor>::ExpandUniquenessFilterCursor::Reset() {
|
|
|
|
input_cursor_->Reset();
|
|
|
|
}
|
|
|
|
|
2017-04-04 15:04:26 +08:00
|
|
|
// instantiations of the ExpandUniquenessFilter template class
|
|
|
|
// we only ever need these two
|
|
|
|
template class ExpandUniquenessFilter<VertexAccessor>;
|
|
|
|
template class ExpandUniquenessFilter<EdgeAccessor>;
|
|
|
|
|
2017-04-11 18:29:20 +08:00
|
|
|
Accumulate::Accumulate(const std::shared_ptr<LogicalOperator> &input,
|
2017-04-05 21:00:26 +08:00
|
|
|
const std::vector<Symbol> &symbols, bool advance_command)
|
|
|
|
: input_(input), symbols_(symbols), advance_command_(advance_command) {}
|
2017-04-05 16:34:10 +08:00
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(Accumulate)
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Accumulate::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-04-05 21:00:26 +08:00
|
|
|
return std::make_unique<Accumulate::AccumulateCursor>(*this, db);
|
2017-04-05 16:34:10 +08:00
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> Accumulate::ModifiedSymbols(const SymbolTable &) const {
|
|
|
|
return symbols_;
|
|
|
|
}
|
|
|
|
|
2017-04-11 18:29:20 +08:00
|
|
|
Accumulate::AccumulateCursor::AccumulateCursor(const Accumulate &self,
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db)
|
2017-04-05 21:00:26 +08:00
|
|
|
: self_(self), db_(db), input_cursor_(self.input_->MakeCursor(db)) {}
|
2017-04-05 16:34:10 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Accumulate::AccumulateCursor::Pull(Frame &frame, Context &context) {
|
2017-04-05 21:00:26 +08:00
|
|
|
// cache all the input
|
|
|
|
if (!pulled_all_input_) {
|
2017-09-13 16:27:12 +08:00
|
|
|
while (input_cursor_->Pull(frame, context)) {
|
query::plan - Ops use vector instead of list
Summary:
Replaced std::list with std::vector in all plan operators. Performance increase in harness tests is not visible. Defined a custom test:
```
unwind range(0, 1000000) as x
create ({a: tointeger(rand() * 100), b: tointeger(rand() * 100), c: tointeger(rand() * 100), d: tointeger(rand() * 10), e: tointeger(rand() * 10), f: tointeger(rand() * 10)});
match (n) return min(n.a), max(n.b), sum(n.c), n.d, n.e, n.f
match (n) with distinct n.a AS a, n.b AS b, n.c AS c, n.d AS d, n.e AS e, n.f AS f return count(*)
```
In that test performance gains are 9.8% on the aggregation query (mean 0.83s vs 092s) and 34% (mean 2.15s vs 3.25s) on the distinct query. Doubt we'll see much on any of the LDBC tests because they don't stress those operators nearly as much.
Reviewers: buda, teon.banek, mislav.bradac
Reviewed By: teon.banek
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D778
2017-09-12 16:08:58 +08:00
|
|
|
std::vector<TypedValue> row;
|
|
|
|
row.reserve(self_.symbols_.size());
|
2017-04-05 21:00:26 +08:00
|
|
|
for (const Symbol &symbol : self_.symbols_)
|
|
|
|
row.emplace_back(frame[symbol]);
|
query::plan - Ops use vector instead of list
Summary:
Replaced std::list with std::vector in all plan operators. Performance increase in harness tests is not visible. Defined a custom test:
```
unwind range(0, 1000000) as x
create ({a: tointeger(rand() * 100), b: tointeger(rand() * 100), c: tointeger(rand() * 100), d: tointeger(rand() * 10), e: tointeger(rand() * 10), f: tointeger(rand() * 10)});
match (n) return min(n.a), max(n.b), sum(n.c), n.d, n.e, n.f
match (n) with distinct n.a AS a, n.b AS b, n.c AS c, n.d AS d, n.e AS e, n.f AS f return count(*)
```
In that test performance gains are 9.8% on the aggregation query (mean 0.83s vs 092s) and 34% (mean 2.15s vs 3.25s) on the distinct query. Doubt we'll see much on any of the LDBC tests because they don't stress those operators nearly as much.
Reviewers: buda, teon.banek, mislav.bradac
Reviewed By: teon.banek
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D778
2017-09-12 16:08:58 +08:00
|
|
|
cache_.emplace_back(std::move(row));
|
2017-04-05 21:00:26 +08:00
|
|
|
}
|
|
|
|
pulled_all_input_ = true;
|
|
|
|
cache_it_ = cache_.begin();
|
|
|
|
|
|
|
|
if (self_.advance_command_) {
|
2017-08-09 21:36:01 +08:00
|
|
|
db_.AdvanceCommand();
|
2017-04-05 21:00:26 +08:00
|
|
|
for (auto &row : cache_)
|
2018-02-08 20:27:07 +08:00
|
|
|
for (auto &col : row) query::ReconstructTypedValue(col);
|
2017-04-05 21:00:26 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cache_it_ == cache_.end()) return false;
|
|
|
|
auto row_it = (cache_it_++)->begin();
|
|
|
|
for (const Symbol &symbol : self_.symbols_) frame[symbol] = *row_it++;
|
|
|
|
return true;
|
2017-04-05 16:34:10 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void Accumulate::AccumulateCursor::Reset() {
|
|
|
|
input_cursor_->Reset();
|
|
|
|
cache_.clear();
|
|
|
|
cache_it_ = cache_.begin();
|
|
|
|
pulled_all_input_ = false;
|
|
|
|
}
|
|
|
|
|
2017-04-06 15:31:02 +08:00
|
|
|
Aggregate::Aggregate(const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
const std::vector<Aggregate::Element> &aggregations,
|
2017-04-11 21:11:48 +08:00
|
|
|
const std::vector<Expression *> &group_by,
|
2017-04-20 21:19:52 +08:00
|
|
|
const std::vector<Symbol> &remember)
|
2017-04-21 16:57:53 +08:00
|
|
|
: input_(input ? input : std::make_shared<Once>()),
|
2017-04-11 21:11:48 +08:00
|
|
|
aggregations_(aggregations),
|
|
|
|
group_by_(group_by),
|
2017-04-20 21:19:52 +08:00
|
|
|
remember_(remember) {}
|
2017-04-06 15:31:02 +08:00
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(Aggregate)
|
2017-04-06 15:31:02 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Aggregate::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-04-11 21:11:48 +08:00
|
|
|
return std::make_unique<AggregateCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> Aggregate::ModifiedSymbols(const SymbolTable &) const {
|
|
|
|
auto symbols = remember_;
|
|
|
|
for (const auto &elem : aggregations_) symbols.push_back(elem.output_sym);
|
|
|
|
return symbols;
|
|
|
|
}
|
|
|
|
|
2017-09-19 22:58:22 +08:00
|
|
|
Aggregate::AggregateCursor::AggregateCursor(const Aggregate &self,
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db)
|
2018-06-28 22:04:29 +08:00
|
|
|
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
|
2017-04-11 21:11:48 +08:00
|
|
|
|
2017-05-30 15:37:24 +08:00
|
|
|
namespace {
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
/** Returns the default TypedValue for an Aggregation element.
|
2017-05-30 15:37:24 +08:00
|
|
|
* This value is valid both for returning when where are no inputs
|
|
|
|
* to the aggregation op, and for initializing an aggregation result
|
|
|
|
* when there are */
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
TypedValue DefaultAggregationOpValue(const Aggregate::Element &element) {
|
|
|
|
switch (element.op) {
|
2017-05-30 15:37:24 +08:00
|
|
|
case Aggregation::Op::COUNT:
|
|
|
|
return TypedValue(0);
|
|
|
|
case Aggregation::Op::SUM:
|
|
|
|
case Aggregation::Op::MIN:
|
|
|
|
case Aggregation::Op::MAX:
|
|
|
|
case Aggregation::Op::AVG:
|
|
|
|
return TypedValue::Null;
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
case Aggregation::Op::COLLECT_LIST:
|
2017-05-30 15:37:24 +08:00
|
|
|
return TypedValue(std::vector<TypedValue>());
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
case Aggregation::Op::COLLECT_MAP:
|
|
|
|
return TypedValue(std::map<std::string, TypedValue>());
|
2017-05-30 15:37:24 +08:00
|
|
|
}
|
|
|
|
}
|
2017-09-20 21:28:00 +08:00
|
|
|
} // namespace
|
2017-05-30 15:37:24 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Aggregate::AggregateCursor::Pull(Frame &frame, Context &context) {
|
2017-04-11 21:11:48 +08:00
|
|
|
if (!pulled_all_input_) {
|
2017-09-13 16:27:12 +08:00
|
|
|
ProcessAll(frame, context);
|
2017-04-11 21:11:48 +08:00
|
|
|
pulled_all_input_ = true;
|
|
|
|
aggregation_it_ = aggregation_.begin();
|
2017-05-30 15:37:24 +08:00
|
|
|
|
2017-10-05 17:25:52 +08:00
|
|
|
// in case there is no input and no group_bys we need to return true
|
|
|
|
// just this once
|
2017-05-30 15:37:24 +08:00
|
|
|
if (aggregation_.empty() && self_.group_by_.empty()) {
|
|
|
|
// place default aggregation values on the frame
|
|
|
|
for (const auto &elem : self_.aggregations_)
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
frame[elem.output_sym] = DefaultAggregationOpValue(elem);
|
2017-05-30 15:37:24 +08:00
|
|
|
// place null as remember values on the frame
|
|
|
|
for (const Symbol &remember_sym : self_.remember_)
|
|
|
|
frame[remember_sym] = TypedValue::Null;
|
|
|
|
return true;
|
|
|
|
}
|
2017-04-11 21:11:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
if (aggregation_it_ == aggregation_.end()) return false;
|
|
|
|
|
|
|
|
// place aggregation values on the frame
|
|
|
|
auto aggregation_values_it = aggregation_it_->second.values_.begin();
|
|
|
|
for (const auto &aggregation_elem : self_.aggregations_)
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
frame[aggregation_elem.output_sym] = *aggregation_values_it++;
|
2017-04-11 21:11:48 +08:00
|
|
|
|
|
|
|
// place remember values on the frame
|
|
|
|
auto remember_values_it = aggregation_it_->second.remember_.begin();
|
|
|
|
for (const Symbol &remember_sym : self_.remember_)
|
|
|
|
frame[remember_sym] = *remember_values_it++;
|
|
|
|
|
|
|
|
aggregation_it_++;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
void Aggregate::AggregateCursor::ProcessAll(Frame &frame, Context &context) {
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::NEW);
|
2017-09-13 16:27:12 +08:00
|
|
|
while (input_cursor_->Pull(frame, context))
|
|
|
|
ProcessOne(frame, context.symbol_table_, evaluator);
|
2017-04-11 21:11:48 +08:00
|
|
|
|
|
|
|
// calculate AVG aggregations (so far they have only been summed)
|
2017-07-14 19:58:25 +08:00
|
|
|
for (int pos = 0; pos < static_cast<int>(self_.aggregations_.size()); ++pos) {
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
if (self_.aggregations_[pos].op != Aggregation::Op::AVG) continue;
|
2017-04-11 21:11:48 +08:00
|
|
|
for (auto &kv : aggregation_) {
|
|
|
|
AggregationValue &agg_value = kv.second;
|
|
|
|
int count = agg_value.counts_[pos];
|
|
|
|
if (count > 0)
|
|
|
|
agg_value.values_[pos] = agg_value.values_[pos] / (double)count;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2017-04-13 22:47:11 +08:00
|
|
|
void Aggregate::AggregateCursor::ProcessOne(Frame &frame,
|
|
|
|
const SymbolTable &symbol_table,
|
|
|
|
ExpressionEvaluator &evaluator) {
|
query::plan - Ops use vector instead of list
Summary:
Replaced std::list with std::vector in all plan operators. Performance increase in harness tests is not visible. Defined a custom test:
```
unwind range(0, 1000000) as x
create ({a: tointeger(rand() * 100), b: tointeger(rand() * 100), c: tointeger(rand() * 100), d: tointeger(rand() * 10), e: tointeger(rand() * 10), f: tointeger(rand() * 10)});
match (n) return min(n.a), max(n.b), sum(n.c), n.d, n.e, n.f
match (n) with distinct n.a AS a, n.b AS b, n.c AS c, n.d AS d, n.e AS e, n.f AS f return count(*)
```
In that test performance gains are 9.8% on the aggregation query (mean 0.83s vs 092s) and 34% (mean 2.15s vs 3.25s) on the distinct query. Doubt we'll see much on any of the LDBC tests because they don't stress those operators nearly as much.
Reviewers: buda, teon.banek, mislav.bradac
Reviewed By: teon.banek
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D778
2017-09-12 16:08:58 +08:00
|
|
|
std::vector<TypedValue> group_by;
|
|
|
|
group_by.reserve(self_.group_by_.size());
|
2017-04-13 22:47:11 +08:00
|
|
|
for (Expression *expression : self_.group_by_) {
|
2017-05-16 16:55:02 +08:00
|
|
|
group_by.emplace_back(expression->Accept(evaluator));
|
2017-04-13 22:47:11 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
AggregationValue &agg_value = aggregation_[group_by];
|
|
|
|
EnsureInitialized(frame, agg_value);
|
|
|
|
Update(frame, symbol_table, evaluator, agg_value);
|
|
|
|
}
|
|
|
|
|
2017-04-11 21:11:48 +08:00
|
|
|
void Aggregate::AggregateCursor::EnsureInitialized(
|
2017-04-24 16:16:53 +08:00
|
|
|
Frame &frame,
|
|
|
|
Aggregate::AggregateCursor::AggregationValue &agg_value) const {
|
2017-04-11 21:11:48 +08:00
|
|
|
if (agg_value.values_.size() > 0) return;
|
|
|
|
|
2017-05-30 15:37:24 +08:00
|
|
|
for (const auto &agg_elem : self_.aggregations_)
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
agg_value.values_.emplace_back(DefaultAggregationOpValue(agg_elem));
|
2017-04-11 21:11:48 +08:00
|
|
|
agg_value.counts_.resize(self_.aggregations_.size(), 0);
|
|
|
|
|
|
|
|
for (const Symbol &remember_sym : self_.remember_)
|
|
|
|
agg_value.remember_.push_back(frame[remember_sym]);
|
|
|
|
}
|
|
|
|
|
|
|
|
void Aggregate::AggregateCursor::Update(
|
2017-07-14 19:58:25 +08:00
|
|
|
Frame &, const SymbolTable &, ExpressionEvaluator &evaluator,
|
2017-04-11 21:11:48 +08:00
|
|
|
Aggregate::AggregateCursor::AggregationValue &agg_value) {
|
2017-10-11 19:19:10 +08:00
|
|
|
DCHECK(self_.aggregations_.size() == agg_value.values_.size())
|
|
|
|
<< "Expected as much AggregationValue.values_ as there are "
|
|
|
|
"aggregations.";
|
|
|
|
DCHECK(self_.aggregations_.size() == agg_value.counts_.size())
|
|
|
|
<< "Expected as much AggregationValue.counts_ as there are "
|
|
|
|
"aggregations.";
|
2017-04-11 21:11:48 +08:00
|
|
|
|
|
|
|
// we iterate over counts, values and aggregation info at the same time
|
|
|
|
auto count_it = agg_value.counts_.begin();
|
|
|
|
auto value_it = agg_value.values_.begin();
|
|
|
|
auto agg_elem_it = self_.aggregations_.begin();
|
|
|
|
for (; count_it < agg_value.counts_.end();
|
|
|
|
count_it++, value_it++, agg_elem_it++) {
|
2017-05-06 23:57:39 +08:00
|
|
|
// COUNT(*) is the only case where input expression is optional
|
|
|
|
// handle it here
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
auto input_expr_ptr = agg_elem_it->value;
|
2017-05-06 23:57:39 +08:00
|
|
|
if (!input_expr_ptr) {
|
|
|
|
*count_it += 1;
|
|
|
|
*value_it = *count_it;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2017-05-16 16:55:02 +08:00
|
|
|
TypedValue input_value = input_expr_ptr->Accept(evaluator);
|
2017-04-11 21:11:48 +08:00
|
|
|
|
2017-04-12 21:47:55 +08:00
|
|
|
// Aggregations skip Null input values.
|
2017-04-20 19:31:18 +08:00
|
|
|
if (input_value.IsNull()) continue;
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
const auto &agg_op = agg_elem_it->op;
|
2017-04-11 21:11:48 +08:00
|
|
|
*count_it += 1;
|
|
|
|
if (*count_it == 1) {
|
2017-04-14 23:14:14 +08:00
|
|
|
// first value, nothing to aggregate. check type, set and continue.
|
|
|
|
switch (agg_op) {
|
|
|
|
case Aggregation::Op::MIN:
|
|
|
|
case Aggregation::Op::MAX:
|
2017-05-19 21:49:25 +08:00
|
|
|
*value_it = input_value;
|
2017-04-14 23:14:14 +08:00
|
|
|
EnsureOkForMinMax(input_value);
|
|
|
|
break;
|
|
|
|
case Aggregation::Op::SUM:
|
|
|
|
case Aggregation::Op::AVG:
|
2017-05-19 21:49:25 +08:00
|
|
|
*value_it = input_value;
|
2017-04-14 23:14:14 +08:00
|
|
|
EnsureOkForAvgSum(input_value);
|
|
|
|
break;
|
|
|
|
case Aggregation::Op::COUNT:
|
2017-05-19 21:49:25 +08:00
|
|
|
*value_it = 1;
|
|
|
|
break;
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
case Aggregation::Op::COLLECT_LIST:
|
2017-08-30 21:37:00 +08:00
|
|
|
value_it->Value<std::vector<TypedValue>>().push_back(input_value);
|
|
|
|
break;
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
case Aggregation::Op::COLLECT_MAP:
|
2017-08-30 21:37:00 +08:00
|
|
|
auto key = agg_elem_it->key->Accept(evaluator);
|
|
|
|
if (key.type() != TypedValue::Type::String)
|
|
|
|
throw QueryRuntimeException("Map key must be a string");
|
|
|
|
value_it->Value<std::map<std::string, TypedValue>>().emplace(
|
|
|
|
key.Value<std::string>(), input_value);
|
2017-04-14 23:14:14 +08:00
|
|
|
break;
|
|
|
|
}
|
2017-04-11 21:11:48 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
// aggregation of existing values
|
2017-04-12 21:47:55 +08:00
|
|
|
switch (agg_op) {
|
2017-04-11 21:11:48 +08:00
|
|
|
case Aggregation::Op::COUNT:
|
|
|
|
*value_it = *count_it;
|
|
|
|
break;
|
|
|
|
case Aggregation::Op::MIN: {
|
|
|
|
EnsureOkForMinMax(input_value);
|
2017-06-12 21:12:31 +08:00
|
|
|
try {
|
|
|
|
TypedValue comparison_result = input_value < *value_it;
|
|
|
|
// since we skip nulls we either have a valid comparison, or
|
|
|
|
// an exception was just thrown above
|
|
|
|
// safe to assume a bool TypedValue
|
|
|
|
if (comparison_result.Value<bool>()) *value_it = input_value;
|
|
|
|
} catch (const TypedValueException &) {
|
|
|
|
throw QueryRuntimeException("Unable to get MIN of '{}' and '{}'",
|
|
|
|
input_value.type(), value_it->type());
|
|
|
|
}
|
2017-04-11 21:11:48 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Aggregation::Op::MAX: {
|
|
|
|
// all comments as for Op::Min
|
|
|
|
EnsureOkForMinMax(input_value);
|
2017-06-12 21:12:31 +08:00
|
|
|
try {
|
|
|
|
TypedValue comparison_result = input_value > *value_it;
|
|
|
|
if (comparison_result.Value<bool>()) *value_it = input_value;
|
|
|
|
} catch (const TypedValueException &) {
|
|
|
|
throw QueryRuntimeException("Unable to get MAX of '{}' and '{}'",
|
|
|
|
input_value.type(), value_it->type());
|
|
|
|
}
|
2017-04-11 21:11:48 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
case Aggregation::Op::AVG:
|
|
|
|
// for averaging we sum first and divide by count once all
|
|
|
|
// the input has been processed
|
|
|
|
case Aggregation::Op::SUM:
|
|
|
|
EnsureOkForAvgSum(input_value);
|
|
|
|
*value_it = *value_it + input_value;
|
|
|
|
break;
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
case Aggregation::Op::COLLECT_LIST:
|
2017-08-30 21:37:00 +08:00
|
|
|
value_it->Value<std::vector<TypedValue>>().push_back(input_value);
|
|
|
|
break;
|
Collect Map added
Summary:
Tests are on the way. Please first comment if you're OK with this implementation, some points are discussable.
What works now:
```
bash:MEMGRAPH_ROOT/build/>./tests/manual/console 10
MG>MATCH (n) RETURN COLLECT("age_" + n.age, n.height)
+-----------------------------------------------------------------------------------------------------------------------------------+
| COLLECT("age_" + n.age, n.height) |
+-----------------------------------------------------------------------------------------------------------------------------------+
| {age_10: 176, age_13: 180, age_24: 172, age_25: 179, age_32: 123, age_33: 186, age_37: 147, age_43: 162, age_49: 126, age_6: 170} |
+-----------------------------------------------------------------------------------------------------------------------------------+
```
Reviewers: mislav.bradac, teon.banek, buda
Reviewed By: mislav.bradac, buda
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D695
2017-08-23 16:43:45 +08:00
|
|
|
case Aggregation::Op::COLLECT_MAP:
|
2017-08-30 21:37:00 +08:00
|
|
|
auto key = agg_elem_it->key->Accept(evaluator);
|
|
|
|
if (key.type() != TypedValue::Type::String)
|
|
|
|
throw QueryRuntimeException("Map key must be a string");
|
|
|
|
value_it->Value<std::map<std::string, TypedValue>>().emplace(
|
|
|
|
key.Value<std::string>(), input_value);
|
2017-05-19 21:49:25 +08:00
|
|
|
break;
|
2017-04-11 21:11:48 +08:00
|
|
|
} // end switch over Aggregation::Op enum
|
|
|
|
} // end loop over all aggregations
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void Aggregate::AggregateCursor::Reset() {
|
|
|
|
input_cursor_->Reset();
|
|
|
|
aggregation_.clear();
|
|
|
|
aggregation_it_ = aggregation_.begin();
|
|
|
|
pulled_all_input_ = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Aggregate::AggregateCursor::EnsureOkForMinMax(
|
|
|
|
const TypedValue &value) const {
|
2017-04-11 21:11:48 +08:00
|
|
|
switch (value.type()) {
|
|
|
|
case TypedValue::Type::Bool:
|
|
|
|
case TypedValue::Type::Int:
|
|
|
|
case TypedValue::Type::Double:
|
|
|
|
case TypedValue::Type::String:
|
|
|
|
return;
|
|
|
|
default:
|
2017-06-12 21:12:31 +08:00
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Only Bool, Int, Double and String values are allowed in "
|
2017-04-11 21:11:48 +08:00
|
|
|
"MIN and MAX aggregations");
|
|
|
|
}
|
|
|
|
}
|
2017-04-24 16:16:53 +08:00
|
|
|
void Aggregate::AggregateCursor::EnsureOkForAvgSum(
|
|
|
|
const TypedValue &value) const {
|
2017-04-11 21:11:48 +08:00
|
|
|
switch (value.type()) {
|
|
|
|
case TypedValue::Type::Int:
|
|
|
|
case TypedValue::Type::Double:
|
|
|
|
return;
|
|
|
|
default:
|
2017-06-12 21:12:31 +08:00
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Only numeric values allowed in SUM and AVG aggregations");
|
2017-04-11 21:11:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
query::plan - Ops use vector instead of list
Summary:
Replaced std::list with std::vector in all plan operators. Performance increase in harness tests is not visible. Defined a custom test:
```
unwind range(0, 1000000) as x
create ({a: tointeger(rand() * 100), b: tointeger(rand() * 100), c: tointeger(rand() * 100), d: tointeger(rand() * 10), e: tointeger(rand() * 10), f: tointeger(rand() * 10)});
match (n) return min(n.a), max(n.b), sum(n.c), n.d, n.e, n.f
match (n) with distinct n.a AS a, n.b AS b, n.c AS c, n.d AS d, n.e AS e, n.f AS f return count(*)
```
In that test performance gains are 9.8% on the aggregation query (mean 0.83s vs 092s) and 34% (mean 2.15s vs 3.25s) on the distinct query. Doubt we'll see much on any of the LDBC tests because they don't stress those operators nearly as much.
Reviewers: buda, teon.banek, mislav.bradac
Reviewed By: teon.banek
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D778
2017-09-12 16:08:58 +08:00
|
|
|
bool TypedValueVectorEqual::operator()(
|
|
|
|
const std::vector<TypedValue> &left,
|
|
|
|
const std::vector<TypedValue> &right) const {
|
2017-10-11 19:19:10 +08:00
|
|
|
DCHECK(left.size() == right.size())
|
|
|
|
<< "TypedValueVector comparison should only be done over vectors "
|
|
|
|
"of the same size";
|
2017-04-11 21:11:48 +08:00
|
|
|
return std::equal(left.begin(), left.end(), right.begin(),
|
|
|
|
TypedValue::BoolEqual{});
|
2017-04-06 15:31:02 +08:00
|
|
|
}
|
|
|
|
|
2017-04-18 21:19:42 +08:00
|
|
|
Skip::Skip(const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
Expression *expression)
|
|
|
|
: input_(input), expression_(expression) {}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(Skip)
|
2017-04-18 21:19:42 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Skip::MakeCursor(database::GraphDbAccessor &db) const {
|
2017-04-18 21:19:42 +08:00
|
|
|
return std::make_unique<SkipCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2017-09-19 22:58:22 +08:00
|
|
|
std::vector<Symbol> Skip::OutputSymbols(const SymbolTable &symbol_table) const {
|
2017-04-26 22:12:39 +08:00
|
|
|
// Propagate this to potential Produce.
|
|
|
|
return input_->OutputSymbols(symbol_table);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> Skip::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
return input_->ModifiedSymbols(table);
|
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
Skip::SkipCursor::SkipCursor(const Skip &self, database::GraphDbAccessor &db)
|
2018-06-28 22:04:29 +08:00
|
|
|
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
|
2017-04-18 21:19:42 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Skip::SkipCursor::Pull(Frame &frame, Context &context) {
|
|
|
|
while (input_cursor_->Pull(frame, context)) {
|
2017-04-18 21:19:42 +08:00
|
|
|
if (to_skip_ == -1) {
|
2018-05-15 19:10:15 +08:00
|
|
|
// First successful pull from the input, evaluate the skip expression. The
|
|
|
|
// skip expression doesn't contain identifiers so graph view parameter is
|
|
|
|
// not important.
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::OLD);
|
2017-05-16 16:55:02 +08:00
|
|
|
TypedValue to_skip = self_.expression_->Accept(evaluator);
|
2017-04-18 21:19:42 +08:00
|
|
|
if (to_skip.type() != TypedValue::Type::Int)
|
|
|
|
throw QueryRuntimeException("Result of SKIP expression must be an int");
|
|
|
|
|
|
|
|
to_skip_ = to_skip.Value<int64_t>();
|
|
|
|
if (to_skip_ < 0)
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Result of SKIP expression must be greater or equal to zero");
|
|
|
|
}
|
|
|
|
|
|
|
|
if (skipped_++ < to_skip_) continue;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void Skip::SkipCursor::Reset() {
|
|
|
|
input_cursor_->Reset();
|
|
|
|
to_skip_ = -1;
|
|
|
|
skipped_ = 0;
|
|
|
|
}
|
|
|
|
|
2017-04-18 21:19:42 +08:00
|
|
|
Limit::Limit(const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
Expression *expression)
|
|
|
|
: input_(input), expression_(expression) {}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(Limit)
|
2017-04-18 21:19:42 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Limit::MakeCursor(database::GraphDbAccessor &db) const {
|
2017-04-18 21:19:42 +08:00
|
|
|
return std::make_unique<LimitCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2017-09-19 22:58:22 +08:00
|
|
|
std::vector<Symbol> Limit::OutputSymbols(
|
|
|
|
const SymbolTable &symbol_table) const {
|
2017-04-26 22:12:39 +08:00
|
|
|
// Propagate this to potential Produce.
|
|
|
|
return input_->OutputSymbols(symbol_table);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> Limit::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
return input_->ModifiedSymbols(table);
|
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
Limit::LimitCursor::LimitCursor(const Limit &self,
|
|
|
|
database::GraphDbAccessor &db)
|
2018-06-28 22:04:29 +08:00
|
|
|
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
|
2017-04-18 21:19:42 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Limit::LimitCursor::Pull(Frame &frame, Context &context) {
|
2018-05-15 19:10:15 +08:00
|
|
|
// We need to evaluate the limit expression before the first input Pull
|
|
|
|
// because it might be 0 and thereby we shouldn't Pull from input at all.
|
|
|
|
// We can do this before Pulling from the input because the limit expression
|
|
|
|
// is not allowed to contain any identifiers.
|
2017-04-18 21:19:42 +08:00
|
|
|
if (limit_ == -1) {
|
2018-05-15 19:10:15 +08:00
|
|
|
// Limit expression doesn't contain identifiers so graph view is not
|
|
|
|
// important.
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::OLD);
|
2017-05-16 16:55:02 +08:00
|
|
|
TypedValue limit = self_.expression_->Accept(evaluator);
|
2017-04-18 21:19:42 +08:00
|
|
|
if (limit.type() != TypedValue::Type::Int)
|
|
|
|
throw QueryRuntimeException("Result of LIMIT expression must be an int");
|
|
|
|
|
|
|
|
limit_ = limit.Value<int64_t>();
|
|
|
|
if (limit_ < 0)
|
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Result of LIMIT expression must be greater or equal to zero");
|
|
|
|
}
|
|
|
|
|
|
|
|
// check we have not exceeded the limit before pulling
|
2017-04-20 19:31:18 +08:00
|
|
|
if (pulled_++ >= limit_) return false;
|
2017-04-18 21:19:42 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
return input_cursor_->Pull(frame, context);
|
2017-04-18 21:19:42 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void Limit::LimitCursor::Reset() {
|
|
|
|
input_cursor_->Reset();
|
|
|
|
limit_ = -1;
|
|
|
|
pulled_ = 0;
|
|
|
|
}
|
|
|
|
|
2017-04-20 19:31:18 +08:00
|
|
|
OrderBy::OrderBy(const std::shared_ptr<LogicalOperator> &input,
|
2017-04-24 19:51:16 +08:00
|
|
|
const std::vector<std::pair<Ordering, Expression *>> &order_by,
|
|
|
|
const std::vector<Symbol> &output_symbols)
|
|
|
|
: input_(input), output_symbols_(output_symbols) {
|
2017-04-20 19:31:18 +08:00
|
|
|
// split the order_by vector into two vectors of orderings and expressions
|
|
|
|
std::vector<Ordering> ordering;
|
|
|
|
ordering.reserve(order_by.size());
|
|
|
|
order_by_.reserve(order_by.size());
|
|
|
|
for (const auto &ordering_expression_pair : order_by) {
|
|
|
|
ordering.emplace_back(ordering_expression_pair.first);
|
|
|
|
order_by_.emplace_back(ordering_expression_pair.second);
|
|
|
|
}
|
2017-09-11 18:05:19 +08:00
|
|
|
compare_ = TypedValueVectorCompare(ordering);
|
2017-04-20 19:31:18 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
ACCEPT_WITH_INPUT(OrderBy)
|
2017-04-20 19:31:18 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> OrderBy::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-04-20 19:31:18 +08:00
|
|
|
return std::make_unique<OrderByCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2017-09-19 22:58:22 +08:00
|
|
|
std::vector<Symbol> OrderBy::OutputSymbols(
|
|
|
|
const SymbolTable &symbol_table) const {
|
2017-04-26 22:12:39 +08:00
|
|
|
// Propagate this to potential Produce.
|
|
|
|
return input_->OutputSymbols(symbol_table);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> OrderBy::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
return input_->ModifiedSymbols(table);
|
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
OrderBy::OrderByCursor::OrderByCursor(const OrderBy &self,
|
|
|
|
database::GraphDbAccessor &db)
|
2018-06-28 22:04:29 +08:00
|
|
|
: self_(self), input_cursor_(self_.input_->MakeCursor(db)) {}
|
2017-04-20 19:31:18 +08:00
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool OrderBy::OrderByCursor::Pull(Frame &frame, Context &context) {
|
2017-04-20 19:31:18 +08:00
|
|
|
if (!did_pull_all_) {
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::OLD);
|
2017-09-13 16:27:12 +08:00
|
|
|
while (input_cursor_->Pull(frame, context)) {
|
2017-04-20 19:31:18 +08:00
|
|
|
// collect the order_by elements
|
2017-09-11 18:05:19 +08:00
|
|
|
std::vector<TypedValue> order_by;
|
|
|
|
order_by.reserve(self_.order_by_.size());
|
2017-04-20 19:31:18 +08:00
|
|
|
for (auto expression_ptr : self_.order_by_) {
|
2017-05-16 16:55:02 +08:00
|
|
|
order_by.emplace_back(expression_ptr->Accept(evaluator));
|
2017-04-20 19:31:18 +08:00
|
|
|
}
|
|
|
|
|
2017-04-24 19:51:16 +08:00
|
|
|
// collect the output elements
|
2017-09-11 18:05:19 +08:00
|
|
|
std::vector<TypedValue> output;
|
|
|
|
output.reserve(self_.output_symbols_.size());
|
2017-04-24 19:51:16 +08:00
|
|
|
for (const Symbol &output_sym : self_.output_symbols_)
|
|
|
|
output.emplace_back(frame[output_sym]);
|
2017-04-20 19:31:18 +08:00
|
|
|
|
2017-09-11 18:05:19 +08:00
|
|
|
cache_.emplace_back(std::move(order_by), std::move(output));
|
2017-04-20 19:31:18 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::sort(cache_.begin(), cache_.end(),
|
|
|
|
[this](const auto &pair1, const auto &pair2) {
|
|
|
|
return self_.compare_(pair1.first, pair2.first);
|
|
|
|
});
|
|
|
|
|
|
|
|
did_pull_all_ = true;
|
|
|
|
cache_it_ = cache_.begin();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (cache_it_ == cache_.end()) return false;
|
|
|
|
|
2017-04-24 19:51:16 +08:00
|
|
|
// place the output values on the frame
|
2017-10-11 19:19:10 +08:00
|
|
|
DCHECK(self_.output_symbols_.size() == cache_it_->second.size())
|
|
|
|
<< "Number of values does not match the number of output symbols "
|
|
|
|
"in OrderBy";
|
2017-04-24 19:51:16 +08:00
|
|
|
auto output_sym_it = self_.output_symbols_.begin();
|
|
|
|
for (const TypedValue &output : cache_it_->second)
|
|
|
|
frame[*output_sym_it++] = output;
|
2017-04-20 19:31:18 +08:00
|
|
|
|
|
|
|
cache_it_++;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2017-04-24 16:16:53 +08:00
|
|
|
void OrderBy::OrderByCursor::Reset() {
|
|
|
|
input_cursor_->Reset();
|
|
|
|
did_pull_all_ = false;
|
|
|
|
cache_.clear();
|
|
|
|
cache_it_ = cache_.begin();
|
|
|
|
}
|
|
|
|
|
2017-11-09 20:46:37 +08:00
|
|
|
Merge::Merge(const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
const std::shared_ptr<LogicalOperator> &merge_match,
|
|
|
|
const std::shared_ptr<LogicalOperator> &merge_create)
|
2017-04-25 21:22:21 +08:00
|
|
|
: input_(input ? input : std::make_shared<Once>()),
|
|
|
|
merge_match_(merge_match),
|
|
|
|
merge_create_(merge_create) {}
|
|
|
|
|
2017-05-16 15:16:46 +08:00
|
|
|
bool Merge::Accept(HierarchicalLogicalOperatorVisitor &visitor) {
|
2017-04-25 21:22:21 +08:00
|
|
|
if (visitor.PreVisit(*this)) {
|
2017-05-16 15:16:46 +08:00
|
|
|
input_->Accept(visitor) && merge_match_->Accept(visitor) &&
|
|
|
|
merge_create_->Accept(visitor);
|
2017-04-25 21:22:21 +08:00
|
|
|
}
|
2017-05-16 15:16:46 +08:00
|
|
|
return visitor.PostVisit(*this);
|
2017-04-25 21:22:21 +08:00
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Merge::MakeCursor(database::GraphDbAccessor &db) const {
|
2017-04-25 21:22:21 +08:00
|
|
|
return std::make_unique<MergeCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> Merge::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
auto symbols = input_->ModifiedSymbols(table);
|
2018-05-15 23:38:47 +08:00
|
|
|
// Match and create branches should have the same symbols, so just take one
|
|
|
|
// of them.
|
2018-02-20 18:08:43 +08:00
|
|
|
auto my_symbols = merge_match_->OutputSymbols(table);
|
|
|
|
symbols.insert(symbols.end(), my_symbols.begin(), my_symbols.end());
|
|
|
|
return symbols;
|
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
Merge::MergeCursor::MergeCursor(const Merge &self,
|
|
|
|
database::GraphDbAccessor &db)
|
2017-04-25 21:22:21 +08:00
|
|
|
: input_cursor_(self.input_->MakeCursor(db)),
|
|
|
|
merge_match_cursor_(self.merge_match_->MakeCursor(db)),
|
|
|
|
merge_create_cursor_(self.merge_create_->MakeCursor(db)) {}
|
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Merge::MergeCursor::Pull(Frame &frame, Context &context) {
|
2017-04-27 16:48:21 +08:00
|
|
|
if (pull_input_) {
|
2017-09-13 16:27:12 +08:00
|
|
|
if (input_cursor_->Pull(frame, context)) {
|
2017-04-27 16:48:21 +08:00
|
|
|
// after a successful input from the input
|
|
|
|
// reset merge_match (it's expand iterators maintain state)
|
|
|
|
// and merge_create (could have a Once at the beginning)
|
|
|
|
merge_match_cursor_->Reset();
|
|
|
|
merge_create_cursor_->Reset();
|
|
|
|
} else
|
|
|
|
// input is exhausted, we're done
|
|
|
|
return false;
|
|
|
|
}
|
2017-04-25 21:22:21 +08:00
|
|
|
|
2017-04-27 16:48:21 +08:00
|
|
|
// pull from the merge_match cursor
|
2017-09-13 16:27:12 +08:00
|
|
|
if (merge_match_cursor_->Pull(frame, context)) {
|
2017-04-27 16:48:21 +08:00
|
|
|
// if successful, next Pull from this should not pull_input_
|
|
|
|
pull_input_ = false;
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
// failed to Pull from the merge_match cursor
|
|
|
|
if (pull_input_) {
|
|
|
|
// if we have just now pulled from the input
|
|
|
|
// and failed to pull from merge_match, we should create
|
2017-07-14 19:58:25 +08:00
|
|
|
__attribute__((unused)) bool merge_create_pull_result =
|
2017-09-13 16:27:12 +08:00
|
|
|
merge_create_cursor_->Pull(frame, context);
|
2017-10-11 19:19:10 +08:00
|
|
|
DCHECK(merge_create_pull_result) << "MergeCreate must never fail";
|
2017-04-25 21:22:21 +08:00
|
|
|
return true;
|
|
|
|
}
|
2017-10-05 17:25:52 +08:00
|
|
|
// we have exhausted merge_match_cursor_ after 1 or more successful
|
|
|
|
// Pulls
|
2017-04-27 16:48:21 +08:00
|
|
|
// attempt next input_cursor_ pull
|
|
|
|
pull_input_ = true;
|
2017-09-13 16:27:12 +08:00
|
|
|
return Pull(frame, context);
|
2017-04-25 21:22:21 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Merge::MergeCursor::Reset() {
|
|
|
|
input_cursor_->Reset();
|
|
|
|
merge_match_cursor_->Reset();
|
|
|
|
merge_create_cursor_->Reset();
|
2017-04-27 16:48:21 +08:00
|
|
|
pull_input_ = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
Optional::Optional(const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
const std::shared_ptr<LogicalOperator> &optional,
|
|
|
|
const std::vector<Symbol> &optional_symbols)
|
|
|
|
: input_(input ? input : std::make_shared<Once>()),
|
|
|
|
optional_(optional),
|
|
|
|
optional_symbols_(optional_symbols) {}
|
|
|
|
|
2017-05-16 15:16:46 +08:00
|
|
|
bool Optional::Accept(HierarchicalLogicalOperatorVisitor &visitor) {
|
2017-04-27 16:48:21 +08:00
|
|
|
if (visitor.PreVisit(*this)) {
|
2017-05-16 15:16:46 +08:00
|
|
|
input_->Accept(visitor) && optional_->Accept(visitor);
|
2017-04-27 16:48:21 +08:00
|
|
|
}
|
2017-05-16 15:16:46 +08:00
|
|
|
return visitor.PostVisit(*this);
|
2017-04-27 16:48:21 +08:00
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Optional::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-04-27 16:48:21 +08:00
|
|
|
return std::make_unique<OptionalCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> Optional::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
auto symbols = input_->ModifiedSymbols(table);
|
|
|
|
auto my_symbols = optional_->ModifiedSymbols(table);
|
|
|
|
symbols.insert(symbols.end(), my_symbols.begin(), my_symbols.end());
|
|
|
|
return symbols;
|
|
|
|
}
|
|
|
|
|
2017-09-19 22:58:22 +08:00
|
|
|
Optional::OptionalCursor::OptionalCursor(const Optional &self,
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db)
|
2017-04-27 16:48:21 +08:00
|
|
|
: self_(self),
|
|
|
|
input_cursor_(self.input_->MakeCursor(db)),
|
|
|
|
optional_cursor_(self.optional_->MakeCursor(db)) {}
|
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Optional::OptionalCursor::Pull(Frame &frame, Context &context) {
|
2017-04-27 16:48:21 +08:00
|
|
|
if (pull_input_) {
|
2017-09-13 16:27:12 +08:00
|
|
|
if (input_cursor_->Pull(frame, context)) {
|
2017-04-27 16:48:21 +08:00
|
|
|
// after a successful input from the input
|
|
|
|
// reset optional_ (it's expand iterators maintain state)
|
|
|
|
optional_cursor_->Reset();
|
|
|
|
} else
|
|
|
|
// input is exhausted, we're done
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// pull from the optional_ cursor
|
2017-09-13 16:27:12 +08:00
|
|
|
if (optional_cursor_->Pull(frame, context)) {
|
2017-04-27 16:48:21 +08:00
|
|
|
// if successful, next Pull from this should not pull_input_
|
|
|
|
pull_input_ = false;
|
|
|
|
return true;
|
|
|
|
} else {
|
|
|
|
// failed to Pull from the merge_match cursor
|
|
|
|
if (pull_input_) {
|
|
|
|
// if we have just now pulled from the input
|
|
|
|
// and failed to pull from optional_ so set the
|
|
|
|
// optional symbols to Null, ensure next time the
|
|
|
|
// input gets pulled and return true
|
|
|
|
for (const Symbol &sym : self_.optional_symbols_)
|
|
|
|
frame[sym] = TypedValue::Null;
|
|
|
|
pull_input_ = true;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
// we have exhausted optional_cursor_ after 1 or more successful Pulls
|
|
|
|
// attempt next input_cursor_ pull
|
|
|
|
pull_input_ = true;
|
2017-09-13 16:27:12 +08:00
|
|
|
return Pull(frame, context);
|
2017-04-27 16:48:21 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Optional::OptionalCursor::Reset() {
|
|
|
|
input_cursor_->Reset();
|
|
|
|
optional_cursor_->Reset();
|
|
|
|
pull_input_ = true;
|
2017-04-25 21:22:21 +08:00
|
|
|
}
|
|
|
|
|
2017-04-28 21:14:28 +08:00
|
|
|
Unwind::Unwind(const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
Expression *input_expression, Symbol output_symbol)
|
|
|
|
: input_(input ? input : std::make_shared<Once>()),
|
|
|
|
input_expression_(input_expression),
|
|
|
|
output_symbol_(output_symbol) {}
|
|
|
|
|
|
|
|
ACCEPT_WITH_INPUT(Unwind)
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Unwind::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-04-28 21:14:28 +08:00
|
|
|
return std::make_unique<UnwindCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> Unwind::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
auto symbols = input_->ModifiedSymbols(table);
|
|
|
|
symbols.emplace_back(output_symbol_);
|
|
|
|
return symbols;
|
|
|
|
}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
Unwind::UnwindCursor::UnwindCursor(const Unwind &self,
|
|
|
|
database::GraphDbAccessor &db)
|
2017-04-28 21:14:28 +08:00
|
|
|
: self_(self), db_(db), input_cursor_(self.input_->MakeCursor(db)) {}
|
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Unwind::UnwindCursor::Pull(Frame &frame, Context &context) {
|
2017-07-14 19:58:25 +08:00
|
|
|
if (db_.should_abort()) throw HintedAbortError();
|
2017-04-28 21:14:28 +08:00
|
|
|
// if we reached the end of our list of values
|
|
|
|
// pull from the input
|
|
|
|
if (input_value_it_ == input_value_.end()) {
|
2017-09-13 16:27:12 +08:00
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
2017-04-28 21:14:28 +08:00
|
|
|
|
|
|
|
// successful pull from input, initialize value and iterator
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::OLD);
|
2017-05-16 16:55:02 +08:00
|
|
|
TypedValue input_value = self_.input_expression_->Accept(evaluator);
|
2017-04-28 21:14:28 +08:00
|
|
|
if (input_value.type() != TypedValue::Type::List)
|
2017-06-12 21:12:31 +08:00
|
|
|
throw QueryRuntimeException("UNWIND only accepts list values, got '{}'",
|
|
|
|
input_value.type());
|
2017-04-28 21:14:28 +08:00
|
|
|
input_value_ = input_value.Value<std::vector<TypedValue>>();
|
|
|
|
input_value_it_ = input_value_.begin();
|
|
|
|
}
|
|
|
|
|
|
|
|
// if we reached the end of our list of values goto back to top
|
2017-09-13 16:27:12 +08:00
|
|
|
if (input_value_it_ == input_value_.end()) return Pull(frame, context);
|
2017-04-28 21:14:28 +08:00
|
|
|
|
|
|
|
frame[self_.output_symbol_] = *input_value_it_++;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Unwind::UnwindCursor::Reset() {
|
|
|
|
input_cursor_->Reset();
|
|
|
|
input_value_.clear();
|
|
|
|
input_value_it_ = input_value_.end();
|
|
|
|
}
|
|
|
|
|
2017-05-03 18:22:16 +08:00
|
|
|
Distinct::Distinct(const std::shared_ptr<LogicalOperator> &input,
|
|
|
|
const std::vector<Symbol> &value_symbols)
|
|
|
|
: input_(input ? input : std::make_shared<Once>()),
|
|
|
|
value_symbols_(value_symbols) {}
|
|
|
|
|
|
|
|
ACCEPT_WITH_INPUT(Distinct)
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Distinct::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-05-03 18:22:16 +08:00
|
|
|
return std::make_unique<DistinctCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2017-09-19 22:58:22 +08:00
|
|
|
std::vector<Symbol> Distinct::OutputSymbols(
|
|
|
|
const SymbolTable &symbol_table) const {
|
2017-05-03 20:57:46 +08:00
|
|
|
// Propagate this to potential Produce.
|
|
|
|
return input_->OutputSymbols(symbol_table);
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> Distinct::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
return input_->ModifiedSymbols(table);
|
|
|
|
}
|
|
|
|
|
2017-09-19 22:58:22 +08:00
|
|
|
Distinct::DistinctCursor::DistinctCursor(const Distinct &self,
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db)
|
2017-05-03 18:22:16 +08:00
|
|
|
: self_(self), input_cursor_(self.input_->MakeCursor(db)) {}
|
|
|
|
|
2017-09-13 16:27:12 +08:00
|
|
|
bool Distinct::DistinctCursor::Pull(Frame &frame, Context &context) {
|
2017-05-03 18:22:16 +08:00
|
|
|
while (true) {
|
2017-09-13 16:27:12 +08:00
|
|
|
if (!input_cursor_->Pull(frame, context)) return false;
|
2017-05-03 18:22:16 +08:00
|
|
|
|
query::plan - Ops use vector instead of list
Summary:
Replaced std::list with std::vector in all plan operators. Performance increase in harness tests is not visible. Defined a custom test:
```
unwind range(0, 1000000) as x
create ({a: tointeger(rand() * 100), b: tointeger(rand() * 100), c: tointeger(rand() * 100), d: tointeger(rand() * 10), e: tointeger(rand() * 10), f: tointeger(rand() * 10)});
match (n) return min(n.a), max(n.b), sum(n.c), n.d, n.e, n.f
match (n) with distinct n.a AS a, n.b AS b, n.c AS c, n.d AS d, n.e AS e, n.f AS f return count(*)
```
In that test performance gains are 9.8% on the aggregation query (mean 0.83s vs 092s) and 34% (mean 2.15s vs 3.25s) on the distinct query. Doubt we'll see much on any of the LDBC tests because they don't stress those operators nearly as much.
Reviewers: buda, teon.banek, mislav.bradac
Reviewed By: teon.banek
Subscribers: pullbot
Differential Revision: https://phabricator.memgraph.io/D778
2017-09-12 16:08:58 +08:00
|
|
|
std::vector<TypedValue> row;
|
|
|
|
row.reserve(self_.value_symbols_.size());
|
2017-05-03 18:22:16 +08:00
|
|
|
for (const auto &symbol : self_.value_symbols_)
|
|
|
|
row.emplace_back(frame[symbol]);
|
|
|
|
if (seen_rows_.insert(std::move(row)).second) return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void Distinct::DistinctCursor::Reset() {
|
|
|
|
input_cursor_->Reset();
|
|
|
|
seen_rows_.clear();
|
|
|
|
}
|
|
|
|
|
2018-01-16 17:09:15 +08:00
|
|
|
CreateIndex::CreateIndex(storage::Label label, storage::Property property)
|
2017-07-03 16:38:58 +08:00
|
|
|
: label_(label), property_(property) {}
|
|
|
|
|
|
|
|
bool CreateIndex::Accept(HierarchicalLogicalOperatorVisitor &visitor) {
|
|
|
|
return visitor.Visit(*this);
|
|
|
|
}
|
|
|
|
|
2018-02-23 22:06:31 +08:00
|
|
|
WITHOUT_SINGLE_INPUT(CreateIndex);
|
|
|
|
|
2017-07-03 16:38:58 +08:00
|
|
|
class CreateIndexCursor : public Cursor {
|
|
|
|
public:
|
2018-01-12 22:17:04 +08:00
|
|
|
CreateIndexCursor(const CreateIndex &self, database::GraphDbAccessor &db)
|
2017-07-03 16:38:58 +08:00
|
|
|
: self_(self), db_(db) {}
|
|
|
|
|
2017-10-10 00:09:28 +08:00
|
|
|
bool Pull(Frame &, Context &ctx) override {
|
2017-07-03 16:38:58 +08:00
|
|
|
if (did_create_) return false;
|
2017-10-10 00:09:28 +08:00
|
|
|
if (ctx.in_explicit_transaction_) {
|
|
|
|
throw IndexInMulticommandTxException();
|
|
|
|
}
|
2017-07-03 16:38:58 +08:00
|
|
|
try {
|
|
|
|
db_.BuildIndex(self_.label(), self_.property());
|
2018-01-12 22:17:04 +08:00
|
|
|
} catch (const database::IndexExistsException &) {
|
2017-07-03 16:38:58 +08:00
|
|
|
// Ignore creating an existing index.
|
|
|
|
}
|
2017-10-10 00:09:28 +08:00
|
|
|
ctx.is_index_created_ = did_create_ = true;
|
2017-07-03 16:38:58 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override { did_create_ = false; }
|
|
|
|
|
|
|
|
private:
|
|
|
|
const CreateIndex &self_;
|
2018-01-12 22:17:04 +08:00
|
|
|
database::GraphDbAccessor &db_;
|
2017-07-03 16:38:58 +08:00
|
|
|
bool did_create_ = false;
|
|
|
|
};
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> CreateIndex::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2017-07-03 16:38:58 +08:00
|
|
|
return std::make_unique<CreateIndexCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2017-11-29 20:55:02 +08:00
|
|
|
Union::Union(const std::shared_ptr<LogicalOperator> &left_op,
|
|
|
|
const std::shared_ptr<LogicalOperator> &right_op,
|
|
|
|
const std::vector<Symbol> &union_symbols,
|
|
|
|
const std::vector<Symbol> &left_symbols,
|
|
|
|
const std::vector<Symbol> &right_symbols)
|
|
|
|
: left_op_(left_op),
|
|
|
|
right_op_(right_op),
|
|
|
|
union_symbols_(union_symbols),
|
|
|
|
left_symbols_(left_symbols),
|
|
|
|
right_symbols_(right_symbols) {}
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
std::unique_ptr<Cursor> Union::MakeCursor(database::GraphDbAccessor &db) const {
|
2017-11-29 20:55:02 +08:00
|
|
|
return std::make_unique<Union::UnionCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Union::Accept(HierarchicalLogicalOperatorVisitor &visitor) {
|
|
|
|
if (visitor.PreVisit(*this)) {
|
|
|
|
if (left_op_->Accept(visitor)) {
|
|
|
|
right_op_->Accept(visitor);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return visitor.PostVisit(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<Symbol> Union::OutputSymbols(const SymbolTable &) const {
|
|
|
|
return union_symbols_;
|
|
|
|
}
|
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> Union::ModifiedSymbols(const SymbolTable &) const {
|
|
|
|
return union_symbols_;
|
|
|
|
}
|
|
|
|
|
2018-02-23 22:06:31 +08:00
|
|
|
WITHOUT_SINGLE_INPUT(Union);
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
Union::UnionCursor::UnionCursor(const Union &self,
|
|
|
|
database::GraphDbAccessor &db)
|
2017-11-29 20:55:02 +08:00
|
|
|
: self_(self),
|
|
|
|
left_cursor_(self.left_op_->MakeCursor(db)),
|
|
|
|
right_cursor_(self.right_op_->MakeCursor(db)) {}
|
|
|
|
|
|
|
|
bool Union::UnionCursor::Pull(Frame &frame, Context &context) {
|
|
|
|
std::unordered_map<std::string, TypedValue> results;
|
|
|
|
if (left_cursor_->Pull(frame, context)) {
|
|
|
|
// collect values from the left child
|
|
|
|
for (const auto &output_symbol : self_.left_symbols_) {
|
|
|
|
results[output_symbol.name()] = frame[output_symbol];
|
|
|
|
}
|
|
|
|
} else if (right_cursor_->Pull(frame, context)) {
|
|
|
|
// collect values from the right child
|
|
|
|
for (const auto &output_symbol : self_.right_symbols_) {
|
|
|
|
results[output_symbol.name()] = frame[output_symbol];
|
|
|
|
}
|
|
|
|
} else {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
// put collected values on frame under union symbols
|
|
|
|
for (const auto &symbol : self_.union_symbols_) {
|
|
|
|
frame[symbol] = results[symbol.name()];
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Union::UnionCursor::Reset() {
|
|
|
|
left_cursor_->Reset();
|
|
|
|
right_cursor_->Reset();
|
|
|
|
}
|
|
|
|
|
2018-02-26 17:23:23 +08:00
|
|
|
bool PullRemote::Accept(HierarchicalLogicalOperatorVisitor &visitor) {
|
|
|
|
if (visitor.PreVisit(*this)) {
|
|
|
|
if (input_) input_->Accept(visitor);
|
|
|
|
}
|
|
|
|
return visitor.PostVisit(*this);
|
|
|
|
}
|
2018-01-22 20:59:18 +08:00
|
|
|
|
2018-02-20 18:08:43 +08:00
|
|
|
std::vector<Symbol> PullRemote::OutputSymbols(const SymbolTable &table) const {
|
|
|
|
return input_ ? input_->OutputSymbols(table) : std::vector<Symbol>{};
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<Symbol> PullRemote::ModifiedSymbols(
|
|
|
|
const SymbolTable &table) const {
|
|
|
|
auto symbols = symbols_;
|
|
|
|
if (input_) {
|
|
|
|
auto input_symbols = input_->ModifiedSymbols(table);
|
|
|
|
symbols.insert(symbols.end(), input_symbols.begin(), input_symbols.end());
|
|
|
|
}
|
|
|
|
return symbols;
|
|
|
|
}
|
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
std::vector<Symbol> Synchronize::ModifiedSymbols(
|
|
|
|
const SymbolTable &table) const {
|
|
|
|
auto symbols = input_->ModifiedSymbols(table);
|
|
|
|
if (pull_remote_) {
|
|
|
|
auto pull_symbols = pull_remote_->ModifiedSymbols(table);
|
|
|
|
symbols.insert(symbols.end(), pull_symbols.begin(), pull_symbols.end());
|
|
|
|
}
|
|
|
|
return symbols;
|
2018-01-26 18:19:29 +08:00
|
|
|
}
|
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
bool Synchronize::Accept(HierarchicalLogicalOperatorVisitor &visitor) {
|
|
|
|
if (visitor.PreVisit(*this)) {
|
2018-06-19 19:52:11 +08:00
|
|
|
// pull_remote_ is optional here, so visit it only if we continue visiting
|
|
|
|
// and pull_remote_ does exist.
|
|
|
|
input_->Accept(visitor) && pull_remote_ && pull_remote_->Accept(visitor);
|
2018-02-19 22:07:02 +08:00
|
|
|
}
|
|
|
|
return visitor.PostVisit(*this);
|
|
|
|
}
|
2018-02-02 18:08:27 +08:00
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
std::vector<Symbol> Cartesian::ModifiedSymbols(const SymbolTable &table) const {
|
|
|
|
auto symbols = left_op_->ModifiedSymbols(table);
|
|
|
|
auto right = right_op_->ModifiedSymbols(table);
|
|
|
|
symbols.insert(symbols.end(), right.begin(), right.end());
|
|
|
|
return symbols;
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Cartesian::Accept(HierarchicalLogicalOperatorVisitor &visitor) {
|
|
|
|
if (visitor.PreVisit(*this)) {
|
|
|
|
left_op_->Accept(visitor) && right_op_->Accept(visitor);
|
|
|
|
}
|
|
|
|
return visitor.PostVisit(*this);
|
|
|
|
}
|
|
|
|
|
2018-02-23 22:06:31 +08:00
|
|
|
WITHOUT_SINGLE_INPUT(Cartesian);
|
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
PullRemoteOrderBy::PullRemoteOrderBy(
|
|
|
|
const std::shared_ptr<LogicalOperator> &input, int64_t plan_id,
|
|
|
|
const std::vector<std::pair<Ordering, Expression *>> &order_by,
|
|
|
|
const std::vector<Symbol> &symbols)
|
|
|
|
: input_(input), plan_id_(plan_id), symbols_(symbols) {
|
|
|
|
CHECK(input_ != nullptr)
|
|
|
|
<< "PullRemoteOrderBy should always be constructed with input!";
|
|
|
|
std::vector<Ordering> ordering;
|
|
|
|
ordering.reserve(order_by.size());
|
|
|
|
order_by_.reserve(order_by.size());
|
|
|
|
for (const auto &ordering_expression_pair : order_by) {
|
|
|
|
ordering.emplace_back(ordering_expression_pair.first);
|
|
|
|
order_by_.emplace_back(ordering_expression_pair.second);
|
|
|
|
}
|
|
|
|
compare_ = TypedValueVectorCompare(ordering);
|
|
|
|
}
|
|
|
|
|
|
|
|
ACCEPT_WITH_INPUT(PullRemoteOrderBy);
|
|
|
|
|
|
|
|
std::vector<Symbol> PullRemoteOrderBy::OutputSymbols(
|
|
|
|
const SymbolTable &table) const {
|
|
|
|
return input_->OutputSymbols(table);
|
|
|
|
}
|
|
|
|
|
|
|
|
std::vector<Symbol> PullRemoteOrderBy::ModifiedSymbols(
|
|
|
|
const SymbolTable &table) const {
|
|
|
|
return input_->ModifiedSymbols(table);
|
|
|
|
}
|
|
|
|
|
|
|
|
namespace {
|
|
|
|
|
2018-04-30 15:33:09 +08:00
|
|
|
/** Helper class that wraps remote pulling for cursors that handle results from
|
|
|
|
* distributed workers.
|
|
|
|
*
|
|
|
|
* The command_id should be the command_id at the initialization of a cursor.
|
2018-02-19 22:07:02 +08:00
|
|
|
*/
|
|
|
|
class RemotePuller {
|
|
|
|
public:
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
RemotePuller(distributed::PullRpcClients *pull_clients,
|
|
|
|
database::GraphDbAccessor &db,
|
2018-04-30 15:33:09 +08:00
|
|
|
const std::vector<Symbol> &symbols, int64_t plan_id,
|
|
|
|
tx::CommandId command_id)
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
: pull_clients_(pull_clients),
|
|
|
|
db_(db),
|
|
|
|
symbols_(symbols),
|
|
|
|
plan_id_(plan_id),
|
|
|
|
command_id_(command_id) {
|
|
|
|
CHECK(pull_clients_);
|
|
|
|
worker_ids_ = pull_clients_->GetWorkerIds();
|
2018-02-19 22:07:02 +08:00
|
|
|
// Remove master from the worker ids list.
|
|
|
|
worker_ids_.erase(std::find(worker_ids_.begin(), worker_ids_.end(), 0));
|
|
|
|
}
|
|
|
|
|
|
|
|
void Initialize(Context &context) {
|
|
|
|
if (!remote_pulls_initialized_) {
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[RemotePuller] [" << context.db_accessor_.transaction_id()
|
2018-04-30 15:33:09 +08:00
|
|
|
<< "] [" << plan_id_ << "] [" << command_id_ << "] initialized";
|
2018-02-19 22:07:02 +08:00
|
|
|
for (auto &worker_id : worker_ids_) {
|
2018-03-23 22:21:46 +08:00
|
|
|
UpdatePullForWorker(worker_id, context);
|
2018-02-19 22:07:02 +08:00
|
|
|
}
|
|
|
|
remote_pulls_initialized_ = true;
|
2018-02-02 18:08:27 +08:00
|
|
|
}
|
2018-01-26 18:19:29 +08:00
|
|
|
}
|
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
void Update(Context &context) {
|
2018-02-02 18:08:27 +08:00
|
|
|
// If we don't have results for a worker, check if his remote pull
|
|
|
|
// finished and save results locally.
|
2018-02-19 22:07:02 +08:00
|
|
|
|
2018-03-12 16:24:31 +08:00
|
|
|
auto move_frames = [this, &context](int worker_id, auto remote_results) {
|
|
|
|
VLOG(10) << "[RemotePuller] [" << context.db_accessor_.transaction_id()
|
2018-04-30 15:33:09 +08:00
|
|
|
<< "] [" << plan_id_ << "] [" << command_id_
|
|
|
|
<< "] received results from " << worker_id;
|
2018-02-19 22:07:02 +08:00
|
|
|
remote_results_[worker_id] = std::move(remote_results.frames);
|
|
|
|
// Since we return and remove results from the back of the vector,
|
|
|
|
// reverse the results so the first to return is on the end of the
|
|
|
|
// vector.
|
|
|
|
std::reverse(remote_results_[worker_id].begin(),
|
|
|
|
remote_results_[worker_id].end());
|
|
|
|
};
|
|
|
|
|
2018-02-02 18:08:27 +08:00
|
|
|
for (auto &worker_id : worker_ids_) {
|
|
|
|
if (!remote_results_[worker_id].empty()) continue;
|
|
|
|
|
|
|
|
auto found_it = remote_pulls_.find(worker_id);
|
|
|
|
if (found_it == remote_pulls_.end()) continue;
|
2018-01-26 18:19:29 +08:00
|
|
|
|
2018-02-02 18:08:27 +08:00
|
|
|
auto &remote_pull = found_it->second;
|
2018-03-08 23:57:15 +08:00
|
|
|
if (!remote_pull.IsReady()) continue;
|
2018-02-02 18:08:27 +08:00
|
|
|
|
|
|
|
auto remote_results = remote_pull.get();
|
|
|
|
switch (remote_results.pull_state) {
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::CURSOR_EXHAUSTED:
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[RemotePuller] ["
|
|
|
|
<< context.db_accessor_.transaction_id() << "] [" << plan_id_
|
2018-04-30 15:33:09 +08:00
|
|
|
<< "] [" << command_id_ << "] cursor exhausted from "
|
|
|
|
<< worker_id;
|
2018-02-19 22:07:02 +08:00
|
|
|
move_frames(worker_id, remote_results);
|
2018-02-02 18:08:27 +08:00
|
|
|
remote_pulls_.erase(found_it);
|
|
|
|
break;
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::CURSOR_IN_PROGRESS:
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[RemotePuller] ["
|
|
|
|
<< context.db_accessor_.transaction_id() << "] [" << plan_id_
|
2018-04-30 15:33:09 +08:00
|
|
|
<< "] [" << command_id_ << "] cursor in progress from "
|
|
|
|
<< worker_id;
|
2018-02-19 22:07:02 +08:00
|
|
|
move_frames(worker_id, remote_results);
|
2018-03-23 22:21:46 +08:00
|
|
|
UpdatePullForWorker(worker_id, context);
|
2018-02-02 18:08:27 +08:00
|
|
|
break;
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::SERIALIZATION_ERROR:
|
2018-02-02 18:08:27 +08:00
|
|
|
throw mvcc::SerializationError(
|
|
|
|
"Serialization error occured during PullRemote !");
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::LOCK_TIMEOUT_ERROR:
|
2018-05-30 19:00:25 +08:00
|
|
|
throw utils::LockTimeoutException(
|
2018-02-08 20:27:07 +08:00
|
|
|
"LockTimeout error occured during PullRemote !");
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::UPDATE_DELETED_ERROR:
|
2018-02-14 16:44:48 +08:00
|
|
|
throw QueryRuntimeException(
|
|
|
|
"RecordDeleted error ocured during PullRemote !");
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::RECONSTRUCTION_ERROR:
|
2018-02-08 20:27:07 +08:00
|
|
|
throw query::ReconstructionException();
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::UNABLE_TO_DELETE_VERTEX_ERROR:
|
2018-03-01 18:03:54 +08:00
|
|
|
throw RemoveAttachedVertexException();
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::HINTED_ABORT_ERROR:
|
2018-03-01 18:03:54 +08:00
|
|
|
throw HintedAbortError();
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::QUERY_ERROR:
|
2018-02-08 20:27:07 +08:00
|
|
|
throw QueryRuntimeException(
|
2018-07-06 21:12:45 +08:00
|
|
|
"Query runtime error occurred during PullRemote !");
|
2018-01-26 18:19:29 +08:00
|
|
|
}
|
2018-02-02 18:08:27 +08:00
|
|
|
}
|
2018-02-19 22:07:02 +08:00
|
|
|
}
|
2018-01-26 18:19:29 +08:00
|
|
|
|
2018-07-06 21:12:45 +08:00
|
|
|
void Reset() {
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
worker_ids_ = pull_clients_->GetWorkerIds();
|
2018-07-06 21:12:45 +08:00
|
|
|
// Remove master from the worker ids list.
|
|
|
|
worker_ids_.erase(std::find(worker_ids_.begin(), worker_ids_.end(), 0));
|
|
|
|
|
|
|
|
// We must clear remote_pulls before reseting cursors to make sure that all
|
|
|
|
// outstanding remote pulls are done. Otherwise we might try to reset cursor
|
|
|
|
// during its pull.
|
|
|
|
remote_pulls_.clear();
|
|
|
|
for (auto &worker_id : worker_ids_) {
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
pull_clients_->ResetCursor(&db_, worker_id, plan_id_, command_id_);
|
2018-07-06 21:12:45 +08:00
|
|
|
}
|
|
|
|
remote_results_.clear();
|
|
|
|
remote_pulls_initialized_ = false;
|
|
|
|
}
|
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
auto Workers() { return worker_ids_; }
|
2018-02-02 18:08:27 +08:00
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
int GetWorkerId(int worker_id_index) { return worker_ids_[worker_id_index]; }
|
2018-02-02 18:08:27 +08:00
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
size_t WorkerCount() { return worker_ids_.size(); }
|
2018-02-08 20:57:03 +08:00
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
void ClearWorkers() { worker_ids_.clear(); }
|
2018-02-14 22:20:28 +08:00
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
bool HasPendingPulls() { return !remote_pulls_.empty(); }
|
|
|
|
|
|
|
|
bool HasPendingPullFromWorker(int worker_id) {
|
|
|
|
return remote_pulls_.find(worker_id) != remote_pulls_.end();
|
2018-01-26 18:19:29 +08:00
|
|
|
}
|
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
bool HasResultsFromWorker(int worker_id) {
|
|
|
|
return !remote_results_[worker_id].empty();
|
2018-02-02 18:08:27 +08:00
|
|
|
}
|
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
std::vector<query::TypedValue> PopResultFromWorker(int worker_id) {
|
|
|
|
auto result = remote_results_[worker_id].back();
|
|
|
|
remote_results_[worker_id].pop_back();
|
|
|
|
|
|
|
|
// Remove the worker if we exhausted all locally stored results and there
|
|
|
|
// are no more pending remote pulls for that worker.
|
|
|
|
if (remote_results_[worker_id].empty() &&
|
|
|
|
remote_pulls_.find(worker_id) == remote_pulls_.end()) {
|
|
|
|
worker_ids_.erase(
|
|
|
|
std::find(worker_ids_.begin(), worker_ids_.end(), worker_id));
|
2018-02-02 18:08:27 +08:00
|
|
|
}
|
2018-02-19 22:07:02 +08:00
|
|
|
|
|
|
|
return result;
|
2018-01-26 18:19:29 +08:00
|
|
|
}
|
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
private:
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
distributed::PullRpcClients *pull_clients_{nullptr};
|
2018-02-19 22:07:02 +08:00
|
|
|
database::GraphDbAccessor &db_;
|
|
|
|
std::vector<Symbol> symbols_;
|
|
|
|
int64_t plan_id_;
|
2018-04-30 15:33:09 +08:00
|
|
|
tx::CommandId command_id_;
|
2018-03-23 22:21:46 +08:00
|
|
|
std::unordered_map<int, utils::Future<distributed::PullData>> remote_pulls_;
|
2018-02-19 22:07:02 +08:00
|
|
|
std::unordered_map<int, std::vector<std::vector<query::TypedValue>>>
|
|
|
|
remote_results_;
|
|
|
|
std::vector<int> worker_ids_;
|
|
|
|
bool remote_pulls_initialized_ = false;
|
|
|
|
|
2018-03-23 22:21:46 +08:00
|
|
|
void UpdatePullForWorker(int worker_id, Context &context) {
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
remote_pulls_[worker_id] = pull_clients_->Pull(
|
2018-07-06 21:12:45 +08:00
|
|
|
&db_, worker_id, plan_id_, command_id_, context.parameters_, symbols_,
|
2018-06-27 17:08:21 +08:00
|
|
|
context.timestamp_, false);
|
2018-01-26 18:19:29 +08:00
|
|
|
}
|
2018-02-19 22:07:02 +08:00
|
|
|
};
|
2018-01-26 18:19:29 +08:00
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
class PullRemoteCursor : public Cursor {
|
|
|
|
public:
|
|
|
|
PullRemoteCursor(const PullRemote &self, database::GraphDbAccessor &db)
|
|
|
|
: self_(self),
|
|
|
|
input_cursor_(self.input() ? self.input()->MakeCursor(db) : nullptr),
|
2018-04-30 15:33:09 +08:00
|
|
|
command_id_(db.transaction().cid()),
|
|
|
|
remote_puller_(
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
// TODO: Pass in a Master GraphDb.
|
|
|
|
&dynamic_cast<database::Master *>(&db.db())->pull_clients(), db,
|
|
|
|
self.symbols(), self.plan_id(), command_id_) {}
|
2018-01-26 18:19:29 +08:00
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
bool Pull(Frame &frame, Context &context) override {
|
2018-03-01 22:06:00 +08:00
|
|
|
if (context.db_accessor_.should_abort()) throw HintedAbortError();
|
2018-02-19 22:07:02 +08:00
|
|
|
remote_puller_.Initialize(context);
|
|
|
|
|
|
|
|
bool have_remote_results = false;
|
|
|
|
while (!have_remote_results && remote_puller_.WorkerCount() > 0) {
|
2018-03-01 22:06:00 +08:00
|
|
|
if (context.db_accessor_.should_abort()) throw HintedAbortError();
|
2018-02-19 22:07:02 +08:00
|
|
|
remote_puller_.Update(context);
|
|
|
|
|
|
|
|
// Get locally stored results from workers in a round-robin fasion.
|
|
|
|
int num_workers = remote_puller_.WorkerCount();
|
|
|
|
for (int i = 0; i < num_workers; ++i) {
|
|
|
|
int worker_id_index =
|
|
|
|
(last_pulled_worker_id_index_ + i + 1) % num_workers;
|
|
|
|
int worker_id = remote_puller_.GetWorkerId(worker_id_index);
|
|
|
|
|
|
|
|
if (remote_puller_.HasResultsFromWorker(worker_id)) {
|
|
|
|
last_pulled_worker_id_index_ = worker_id_index;
|
|
|
|
have_remote_results = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2018-01-26 18:19:29 +08:00
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
if (!have_remote_results) {
|
|
|
|
if (!remote_puller_.HasPendingPulls()) {
|
|
|
|
remote_puller_.ClearWorkers();
|
|
|
|
break;
|
|
|
|
}
|
2018-01-22 20:59:18 +08:00
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
// If there are no remote results available, try to pull and return
|
|
|
|
// local results.
|
|
|
|
if (input_cursor_ && input_cursor_->Pull(frame, context)) {
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[PullRemoteCursor] ["
|
|
|
|
<< context.db_accessor_.transaction_id() << "] ["
|
2018-04-30 15:33:09 +08:00
|
|
|
<< self_.plan_id() << "] [" << command_id_
|
|
|
|
<< "] producing local results ";
|
2018-02-19 22:07:02 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[PullRemoteCursor] ["
|
|
|
|
<< context.db_accessor_.transaction_id() << "] ["
|
2018-04-30 15:33:09 +08:00
|
|
|
<< self_.plan_id() << "] [" << command_id_
|
|
|
|
<< "] no results available, sleeping ";
|
2018-02-19 22:07:02 +08:00
|
|
|
// If there aren't any local/remote results available, sleep.
|
|
|
|
std::this_thread::sleep_for(
|
2018-02-28 20:59:12 +08:00
|
|
|
std::chrono::microseconds(FLAGS_remote_pull_sleep_micros));
|
2018-02-19 22:07:02 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// No more remote results, make sure local results get exhausted.
|
|
|
|
if (!have_remote_results) {
|
|
|
|
if (input_cursor_ && input_cursor_->Pull(frame, context)) {
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[PullRemoteCursor] ["
|
|
|
|
<< context.db_accessor_.transaction_id() << "] ["
|
2018-04-30 15:33:09 +08:00
|
|
|
<< self_.plan_id() << "] [" << command_id_
|
|
|
|
<< "] producing local results ";
|
2018-02-19 22:07:02 +08:00
|
|
|
return true;
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
{
|
|
|
|
int worker_id = remote_puller_.GetWorkerId(last_pulled_worker_id_index_);
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[PullRemoteCursor] ["
|
|
|
|
<< context.db_accessor_.transaction_id() << "] ["
|
2018-04-30 15:33:09 +08:00
|
|
|
<< self_.plan_id() << "] [" << command_id_
|
|
|
|
<< "] producing results from worker " << worker_id;
|
2018-02-19 22:07:02 +08:00
|
|
|
auto result = remote_puller_.PopResultFromWorker(worker_id);
|
|
|
|
for (size_t i = 0; i < self_.symbols().size(); ++i) {
|
|
|
|
frame[self_.symbols()[i]] = std::move(result[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
2018-02-02 17:19:19 +08:00
|
|
|
}
|
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
void Reset() override {
|
2018-07-06 21:12:45 +08:00
|
|
|
if (input_cursor_) input_cursor_->Reset();
|
|
|
|
remote_puller_.Reset();
|
|
|
|
last_pulled_worker_id_index_ = 0;
|
2018-02-20 18:08:43 +08:00
|
|
|
}
|
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
private:
|
|
|
|
const PullRemote &self_;
|
|
|
|
const std::unique_ptr<Cursor> input_cursor_;
|
2018-04-30 15:33:09 +08:00
|
|
|
tx::CommandId command_id_;
|
2018-02-19 22:07:02 +08:00
|
|
|
RemotePuller remote_puller_;
|
|
|
|
int last_pulled_worker_id_index_ = 0;
|
|
|
|
};
|
|
|
|
|
2018-02-14 16:44:48 +08:00
|
|
|
class SynchronizeCursor : public Cursor {
|
|
|
|
public:
|
|
|
|
SynchronizeCursor(const Synchronize &self, database::GraphDbAccessor &db)
|
|
|
|
: self_(self),
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
pull_clients_(
|
|
|
|
// TODO: Pass in a Master GraphDb.
|
|
|
|
&dynamic_cast<database::Master *>(&db.db())->pull_clients()),
|
|
|
|
updates_clients_(
|
|
|
|
// TODO: Pass in a Master GraphDb.
|
|
|
|
&dynamic_cast<database::Master *>(&db.db())->updates_clients()),
|
|
|
|
updates_server_(
|
|
|
|
// TODO: Pass in a Master GraphDb.
|
|
|
|
&dynamic_cast<database::Master *>(&db.db())->updates_server()),
|
2018-02-14 16:44:48 +08:00
|
|
|
input_cursor_(self.input()->MakeCursor(db)),
|
|
|
|
pull_remote_cursor_(
|
2018-04-30 15:33:09 +08:00
|
|
|
self.pull_remote() ? self.pull_remote()->MakeCursor(db) : nullptr),
|
2018-07-26 15:08:21 +08:00
|
|
|
command_id_(db.transaction().cid()),
|
|
|
|
master_id_(
|
|
|
|
// TODO: Pass in a Master GraphDb.
|
|
|
|
dynamic_cast<database::Master *>(&db.db())->WorkerId()) {}
|
2018-02-14 16:44:48 +08:00
|
|
|
|
|
|
|
bool Pull(Frame &frame, Context &context) override {
|
|
|
|
if (!initial_pull_done_) {
|
|
|
|
InitialPull(frame, context);
|
|
|
|
initial_pull_done_ = true;
|
|
|
|
}
|
|
|
|
// Yield local stuff while available.
|
|
|
|
if (!local_frames_.empty()) {
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[SynchronizeCursor] ["
|
|
|
|
<< context.db_accessor_.transaction_id()
|
|
|
|
<< "] producing local results";
|
2018-02-14 16:44:48 +08:00
|
|
|
auto &result = local_frames_.back();
|
|
|
|
for (size_t i = 0; i < frame.elems().size(); ++i) {
|
|
|
|
if (self_.advance_command()) {
|
|
|
|
query::ReconstructTypedValue(result[i]);
|
|
|
|
}
|
|
|
|
frame.elems()[i] = std::move(result[i]);
|
|
|
|
}
|
|
|
|
local_frames_.resize(local_frames_.size() - 1);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// We're out of local stuff, yield from pull_remote if available.
|
2018-03-12 16:24:31 +08:00
|
|
|
if (pull_remote_cursor_ && pull_remote_cursor_->Pull(frame, context)) {
|
|
|
|
VLOG(10) << "[SynchronizeCursor] ["
|
|
|
|
<< context.db_accessor_.transaction_id()
|
|
|
|
<< "] producing remote results";
|
2018-02-14 16:44:48 +08:00
|
|
|
return true;
|
2018-03-12 16:24:31 +08:00
|
|
|
}
|
2018-02-14 16:44:48 +08:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override {
|
2018-07-06 21:12:45 +08:00
|
|
|
input_cursor_->Reset();
|
|
|
|
pull_remote_cursor_->Reset();
|
|
|
|
initial_pull_done_ = false;
|
|
|
|
local_frames_.clear();
|
2018-02-14 16:44:48 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
const Synchronize &self_;
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
distributed::PullRpcClients *pull_clients_{nullptr};
|
|
|
|
distributed::UpdatesRpcClients *updates_clients_{nullptr};
|
|
|
|
distributed::UpdatesRpcServer *updates_server_{nullptr};
|
2018-02-14 16:44:48 +08:00
|
|
|
const std::unique_ptr<Cursor> input_cursor_;
|
|
|
|
const std::unique_ptr<Cursor> pull_remote_cursor_;
|
|
|
|
bool initial_pull_done_{false};
|
|
|
|
std::vector<std::vector<TypedValue>> local_frames_;
|
2018-04-30 15:33:09 +08:00
|
|
|
tx::CommandId command_id_;
|
2018-07-26 15:08:21 +08:00
|
|
|
int master_id_;
|
2018-02-14 16:44:48 +08:00
|
|
|
|
|
|
|
void InitialPull(Frame &frame, Context &context) {
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[SynchronizeCursor] [" << context.db_accessor_.transaction_id()
|
|
|
|
<< "] initial pull";
|
2018-02-14 16:44:48 +08:00
|
|
|
|
|
|
|
// Tell all workers to accumulate, only if there is a remote pull.
|
2018-03-23 22:21:46 +08:00
|
|
|
std::vector<utils::Future<distributed::PullData>> worker_accumulations;
|
2018-02-14 16:44:48 +08:00
|
|
|
if (pull_remote_cursor_) {
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
for (auto worker_id : pull_clients_->GetWorkerIds()) {
|
2018-07-26 15:08:21 +08:00
|
|
|
if (worker_id == master_id_) continue;
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
worker_accumulations.emplace_back(pull_clients_->Pull(
|
2018-07-06 21:12:45 +08:00
|
|
|
&context.db_accessor_, worker_id, self_.pull_remote()->plan_id(),
|
2018-04-30 15:33:09 +08:00
|
|
|
command_id_, context.parameters_, self_.pull_remote()->symbols(),
|
2018-06-27 17:08:21 +08:00
|
|
|
context.timestamp_, true, 0));
|
2018-02-14 16:44:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Accumulate local results
|
|
|
|
while (input_cursor_->Pull(frame, context)) {
|
|
|
|
local_frames_.emplace_back();
|
|
|
|
auto &local_frame = local_frames_.back();
|
|
|
|
local_frame.reserve(frame.elems().size());
|
|
|
|
for (auto &elem : frame.elems()) {
|
|
|
|
local_frame.emplace_back(std::move(elem));
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// Wait for all workers to finish accumulation (first sync point).
|
|
|
|
for (auto &accu : worker_accumulations) {
|
|
|
|
switch (accu.get().pull_state) {
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::CURSOR_EXHAUSTED:
|
2018-02-14 16:44:48 +08:00
|
|
|
continue;
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::CURSOR_IN_PROGRESS:
|
2018-02-14 16:44:48 +08:00
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Expected exhausted cursor after remote pull accumulate");
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::SERIALIZATION_ERROR:
|
2018-02-14 16:44:48 +08:00
|
|
|
throw mvcc::SerializationError(
|
2018-05-15 23:38:47 +08:00
|
|
|
"Failed to perform remote accumulate due to "
|
|
|
|
"SerializationError");
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::UPDATE_DELETED_ERROR:
|
2018-02-14 16:44:48 +08:00
|
|
|
throw QueryRuntimeException(
|
2018-05-15 23:38:47 +08:00
|
|
|
"Failed to perform remote accumulate due to "
|
|
|
|
"RecordDeletedError");
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::LOCK_TIMEOUT_ERROR:
|
2018-05-30 19:00:25 +08:00
|
|
|
throw utils::LockTimeoutException(
|
2018-02-14 16:44:48 +08:00
|
|
|
"Failed to perform remote accumulate due to "
|
|
|
|
"LockTimeoutException");
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::RECONSTRUCTION_ERROR:
|
2018-02-14 16:44:48 +08:00
|
|
|
throw QueryRuntimeException(
|
2018-05-15 23:38:47 +08:00
|
|
|
"Failed to perform remote accumulate due to "
|
|
|
|
"ReconstructionError");
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::UNABLE_TO_DELETE_VERTEX_ERROR:
|
2018-03-01 18:03:54 +08:00
|
|
|
throw RemoveAttachedVertexException();
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::HINTED_ABORT_ERROR:
|
2018-03-01 18:03:54 +08:00
|
|
|
throw HintedAbortError();
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::PullState::QUERY_ERROR:
|
2018-02-14 16:44:48 +08:00
|
|
|
throw QueryRuntimeException(
|
2018-05-15 23:38:47 +08:00
|
|
|
"Failed to perform remote accumulate due to Query runtime "
|
|
|
|
"error");
|
2018-02-14 16:44:48 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (self_.advance_command()) {
|
|
|
|
context.db_accessor_.AdvanceCommand();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Make all the workers apply their deltas.
|
|
|
|
auto tx_id = context.db_accessor_.transaction_id();
|
2018-07-26 15:08:21 +08:00
|
|
|
auto apply_futures = updates_clients_->UpdateApplyAll(master_id_, tx_id);
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
updates_server_->Apply(tx_id);
|
2018-02-14 16:44:48 +08:00
|
|
|
for (auto &future : apply_futures) {
|
|
|
|
switch (future.get()) {
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::UpdateResult::SERIALIZATION_ERROR:
|
2018-02-14 16:44:48 +08:00
|
|
|
throw mvcc::SerializationError(
|
|
|
|
"Failed to apply deferred updates due to SerializationError");
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::UpdateResult::UNABLE_TO_DELETE_VERTEX_ERROR:
|
2018-02-28 17:36:48 +08:00
|
|
|
throw RemoveAttachedVertexException();
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::UpdateResult::UPDATE_DELETED_ERROR:
|
2018-02-14 16:44:48 +08:00
|
|
|
throw QueryRuntimeException(
|
|
|
|
"Failed to apply deferred updates due to RecordDeletedError");
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::UpdateResult::LOCK_TIMEOUT_ERROR:
|
2018-05-30 19:00:25 +08:00
|
|
|
throw utils::LockTimeoutException(
|
2018-02-14 16:44:48 +08:00
|
|
|
"Failed to apply deferred update due to LockTimeoutException");
|
2018-03-23 22:21:46 +08:00
|
|
|
case distributed::UpdateResult::DONE:
|
2018-02-14 16:44:48 +08:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// If the command advanced, let the workers know.
|
|
|
|
if (self_.advance_command()) {
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
auto futures = pull_clients_->NotifyAllTransactionCommandAdvanced(tx_id);
|
2018-02-14 16:44:48 +08:00
|
|
|
for (auto &future : futures) future.wait();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
};
|
2018-02-15 17:20:43 +08:00
|
|
|
|
|
|
|
class CartesianCursor : public Cursor {
|
|
|
|
public:
|
|
|
|
CartesianCursor(const Cartesian &self, database::GraphDbAccessor &db)
|
|
|
|
: self_(self),
|
|
|
|
left_op_cursor_(self.left_op()->MakeCursor(db)),
|
|
|
|
right_op_cursor_(self_.right_op()->MakeCursor(db)) {
|
|
|
|
CHECK(left_op_cursor_ != nullptr)
|
|
|
|
<< "CartesianCursor: Missing left operator cursor.";
|
|
|
|
CHECK(right_op_cursor_ != nullptr)
|
|
|
|
<< "CartesianCursor: Missing right operator cursor.";
|
|
|
|
}
|
|
|
|
|
|
|
|
bool Pull(Frame &frame, Context &context) override {
|
|
|
|
auto copy_frame = [&frame]() {
|
|
|
|
std::vector<TypedValue> result;
|
|
|
|
for (auto &elem : frame.elems()) {
|
|
|
|
result.emplace_back(std::move(elem));
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
};
|
|
|
|
|
|
|
|
if (!cartesian_pull_initialized_) {
|
|
|
|
// Pull all left_op frames.
|
|
|
|
while (left_op_cursor_->Pull(frame, context)) {
|
|
|
|
left_op_frames_.emplace_back(copy_frame());
|
|
|
|
}
|
|
|
|
|
2018-05-15 23:38:47 +08:00
|
|
|
// We're setting the iterator to 'end' here so it pulls the right
|
|
|
|
// cursor.
|
2018-02-15 17:20:43 +08:00
|
|
|
left_op_frames_it_ = left_op_frames_.end();
|
|
|
|
cartesian_pull_initialized_ = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
// If left operator yielded zero results there is no cartesian product.
|
|
|
|
if (left_op_frames_.empty()) {
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto restore_frame = [&frame](const std::vector<Symbol> &symbols,
|
|
|
|
const std::vector<TypedValue> &restore_from) {
|
|
|
|
for (const auto &symbol : symbols) {
|
|
|
|
frame[symbol] = restore_from[symbol.position()];
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
if (left_op_frames_it_ == left_op_frames_.end()) {
|
|
|
|
// Advance right_op_cursor_.
|
|
|
|
if (!right_op_cursor_->Pull(frame, context)) return false;
|
|
|
|
|
|
|
|
right_op_frame_ = copy_frame();
|
|
|
|
left_op_frames_it_ = left_op_frames_.begin();
|
|
|
|
} else {
|
|
|
|
// Make sure right_op_cursor last pulled results are on frame.
|
|
|
|
restore_frame(self_.right_symbols(), right_op_frame_);
|
|
|
|
}
|
|
|
|
|
|
|
|
restore_frame(self_.left_symbols(), *left_op_frames_it_);
|
|
|
|
left_op_frames_it_++;
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override {
|
|
|
|
left_op_cursor_->Reset();
|
|
|
|
right_op_cursor_->Reset();
|
|
|
|
right_op_frame_.clear();
|
|
|
|
left_op_frames_.clear();
|
|
|
|
left_op_frames_it_ = left_op_frames_.end();
|
|
|
|
cartesian_pull_initialized_ = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
const Cartesian &self_;
|
|
|
|
std::vector<std::vector<TypedValue>> left_op_frames_;
|
|
|
|
std::vector<TypedValue> right_op_frame_;
|
|
|
|
const std::unique_ptr<Cursor> left_op_cursor_;
|
|
|
|
const std::unique_ptr<Cursor> right_op_cursor_;
|
|
|
|
std::vector<std::vector<TypedValue>>::iterator left_op_frames_it_;
|
|
|
|
bool cartesian_pull_initialized_{false};
|
|
|
|
};
|
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
class PullRemoteOrderByCursor : public Cursor {
|
|
|
|
public:
|
|
|
|
PullRemoteOrderByCursor(const PullRemoteOrderBy &self,
|
|
|
|
database::GraphDbAccessor &db)
|
|
|
|
: self_(self),
|
|
|
|
input_(self.input()->MakeCursor(db)),
|
2018-04-30 15:33:09 +08:00
|
|
|
command_id_(db.transaction().cid()),
|
|
|
|
remote_puller_(
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
// TODO: Pass in a Master GraphDb.
|
|
|
|
&dynamic_cast<database::Master *>(&db.db())->pull_clients(), db,
|
|
|
|
self.symbols(), self.plan_id(), command_id_) {}
|
2018-02-19 22:07:02 +08:00
|
|
|
|
|
|
|
bool Pull(Frame &frame, Context &context) {
|
2018-03-01 22:06:00 +08:00
|
|
|
if (context.db_accessor_.should_abort()) throw HintedAbortError();
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::OLD);
|
2018-02-19 22:07:02 +08:00
|
|
|
|
|
|
|
auto evaluate_result = [this, &evaluator]() {
|
|
|
|
std::vector<TypedValue> order_by;
|
|
|
|
order_by.reserve(self_.order_by().size());
|
|
|
|
for (auto expression_ptr : self_.order_by()) {
|
|
|
|
order_by.emplace_back(expression_ptr->Accept(evaluator));
|
|
|
|
}
|
|
|
|
return order_by;
|
|
|
|
};
|
|
|
|
|
|
|
|
auto restore_frame = [&frame,
|
|
|
|
this](const std::vector<TypedValue> &restore_from) {
|
|
|
|
for (size_t i = 0; i < restore_from.size(); ++i) {
|
|
|
|
frame[self_.symbols()[i]] = restore_from[i];
|
|
|
|
}
|
|
|
|
};
|
|
|
|
|
|
|
|
if (!merge_initialized_) {
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[PullRemoteOrderBy] ["
|
|
|
|
<< context.db_accessor_.transaction_id() << "] ["
|
2018-04-30 15:33:09 +08:00
|
|
|
<< self_.plan_id() << "] [" << command_id_ << "] initialize";
|
2018-02-19 22:07:02 +08:00
|
|
|
remote_puller_.Initialize(context);
|
|
|
|
missing_results_from_ = remote_puller_.Workers();
|
|
|
|
missing_master_result_ = true;
|
|
|
|
merge_initialized_ = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (missing_master_result_) {
|
|
|
|
if (input_->Pull(frame, context)) {
|
|
|
|
std::vector<TypedValue> output;
|
|
|
|
output.reserve(self_.symbols().size());
|
|
|
|
for (const Symbol &symbol : self_.symbols()) {
|
|
|
|
output.emplace_back(frame[symbol]);
|
|
|
|
}
|
|
|
|
|
|
|
|
merge_.push_back(MergeResultItem{std::experimental::nullopt, output,
|
|
|
|
evaluate_result()});
|
|
|
|
}
|
|
|
|
missing_master_result_ = false;
|
|
|
|
}
|
|
|
|
|
|
|
|
while (!missing_results_from_.empty()) {
|
2018-03-01 22:06:00 +08:00
|
|
|
if (context.db_accessor_.should_abort()) throw HintedAbortError();
|
2018-02-19 22:07:02 +08:00
|
|
|
remote_puller_.Update(context);
|
|
|
|
|
|
|
|
bool has_all_result = true;
|
|
|
|
for (auto &worker_id : missing_results_from_) {
|
|
|
|
if (!remote_puller_.HasResultsFromWorker(worker_id) &&
|
|
|
|
remote_puller_.HasPendingPullFromWorker(worker_id)) {
|
|
|
|
has_all_result = false;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!has_all_result) {
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[PullRemoteOrderByCursor] ["
|
|
|
|
<< context.db_accessor_.transaction_id() << "] ["
|
2018-04-30 15:33:09 +08:00
|
|
|
<< self_.plan_id() << "] [" << command_id_
|
|
|
|
<< "] missing results, sleep";
|
2018-02-19 22:07:02 +08:00
|
|
|
// If we don't have results from all workers, sleep before continuing.
|
|
|
|
std::this_thread::sleep_for(
|
2018-02-28 20:59:12 +08:00
|
|
|
std::chrono::microseconds(FLAGS_remote_pull_sleep_micros));
|
2018-02-19 22:07:02 +08:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (auto &worker_id : missing_results_from_) {
|
|
|
|
// It is possible that the workers remote pull finished but it didn't
|
|
|
|
// return any results. In that case, just skip it.
|
|
|
|
if (!remote_puller_.HasResultsFromWorker(worker_id)) continue;
|
|
|
|
auto remote_result = remote_puller_.PopResultFromWorker(worker_id);
|
|
|
|
restore_frame(remote_result);
|
|
|
|
merge_.push_back(
|
|
|
|
MergeResultItem{worker_id, remote_result, evaluate_result()});
|
|
|
|
}
|
|
|
|
|
|
|
|
missing_results_from_.clear();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (merge_.empty()) return false;
|
|
|
|
|
|
|
|
auto result_it = std::min_element(
|
|
|
|
merge_.begin(), merge_.end(), [this](const auto &lhs, const auto &rhs) {
|
|
|
|
return self_.compare()(lhs.order_by, rhs.order_by);
|
|
|
|
});
|
|
|
|
|
|
|
|
restore_frame(result_it->remote_result);
|
|
|
|
|
|
|
|
if (result_it->worker_id) {
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[PullRemoteOrderByCursor] ["
|
|
|
|
<< context.db_accessor_.transaction_id() << "] ["
|
2018-04-30 15:33:09 +08:00
|
|
|
<< self_.plan_id() << "] [" << command_id_
|
|
|
|
<< "] producing results from worker "
|
2018-03-12 16:24:31 +08:00
|
|
|
<< result_it->worker_id.value();
|
2018-02-19 22:07:02 +08:00
|
|
|
missing_results_from_.push_back(result_it->worker_id.value());
|
|
|
|
} else {
|
2018-03-12 16:24:31 +08:00
|
|
|
VLOG(10) << "[PullRemoteOrderByCursor] ["
|
|
|
|
<< context.db_accessor_.transaction_id() << "] ["
|
2018-04-30 15:33:09 +08:00
|
|
|
<< self_.plan_id() << "] [" << command_id_
|
|
|
|
<< "] producing local results";
|
2018-02-19 22:07:02 +08:00
|
|
|
missing_master_result_ = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
merge_.erase(result_it);
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() {
|
2018-07-06 21:12:45 +08:00
|
|
|
input_->Reset();
|
|
|
|
remote_puller_.Reset();
|
|
|
|
merge_.clear();
|
|
|
|
missing_results_from_.clear();
|
|
|
|
missing_master_result_ = false;
|
|
|
|
merge_initialized_ = false;
|
2018-02-19 22:07:02 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
private:
|
|
|
|
struct MergeResultItem {
|
|
|
|
std::experimental::optional<int> worker_id;
|
|
|
|
std::vector<TypedValue> remote_result;
|
|
|
|
std::vector<TypedValue> order_by;
|
|
|
|
};
|
|
|
|
|
|
|
|
const PullRemoteOrderBy &self_;
|
|
|
|
std::unique_ptr<Cursor> input_;
|
2018-04-30 15:33:09 +08:00
|
|
|
tx::CommandId command_id_;
|
2018-02-19 22:07:02 +08:00
|
|
|
RemotePuller remote_puller_;
|
|
|
|
std::vector<MergeResultItem> merge_;
|
|
|
|
std::vector<int> missing_results_from_;
|
|
|
|
bool missing_master_result_ = false;
|
|
|
|
bool merge_initialized_ = false;
|
|
|
|
};
|
|
|
|
|
2018-02-15 17:13:53 +08:00
|
|
|
} // namespace
|
2018-02-14 16:44:48 +08:00
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
std::unique_ptr<Cursor> PullRemote::MakeCursor(
|
2018-02-14 16:44:48 +08:00
|
|
|
database::GraphDbAccessor &db) const {
|
2018-02-19 22:07:02 +08:00
|
|
|
return std::make_unique<PullRemoteCursor>(*this, db);
|
2018-02-02 17:19:19 +08:00
|
|
|
}
|
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
std::unique_ptr<Cursor> Synchronize::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
|
|
|
return std::make_unique<SynchronizeCursor>(*this, db);
|
2018-02-15 17:13:53 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
std::unique_ptr<Cursor> Cartesian::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2018-02-15 17:20:43 +08:00
|
|
|
return std::make_unique<CartesianCursor>(*this, db);
|
2018-02-15 17:13:53 +08:00
|
|
|
}
|
|
|
|
|
2018-02-19 22:07:02 +08:00
|
|
|
std::unique_ptr<Cursor> PullRemoteOrderBy::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
|
|
|
return std::make_unique<PullRemoteOrderByCursor>(*this, db);
|
2018-02-20 18:08:43 +08:00
|
|
|
}
|
|
|
|
|
2018-06-14 22:02:27 +08:00
|
|
|
ModifyUser::ModifyUser(std::string username, Expression *password,
|
|
|
|
bool is_create)
|
|
|
|
: username_(std::move(username)),
|
|
|
|
password_(password),
|
|
|
|
is_create_(is_create) {}
|
|
|
|
|
|
|
|
bool ModifyUser::Accept(HierarchicalLogicalOperatorVisitor &visitor) {
|
|
|
|
return visitor.Visit(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
WITHOUT_SINGLE_INPUT(ModifyUser)
|
|
|
|
|
|
|
|
class ModifyUserCursor : public Cursor {
|
|
|
|
public:
|
2018-06-27 17:08:21 +08:00
|
|
|
bool Pull(Frame &frame, Context &context) override {
|
|
|
|
if (context.in_explicit_transaction_) {
|
2018-06-14 22:02:27 +08:00
|
|
|
throw UserModificationInMulticommandTxException();
|
|
|
|
}
|
2018-06-27 17:08:21 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &context, GraphView::OLD);
|
2018-06-14 22:02:27 +08:00
|
|
|
throw utils::NotYetImplemented("user auth");
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override { throw utils::NotYetImplemented("user auth"); }
|
|
|
|
};
|
|
|
|
|
|
|
|
std::unique_ptr<Cursor> ModifyUser::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2018-06-28 22:04:29 +08:00
|
|
|
return std::make_unique<ModifyUserCursor>();
|
2018-06-14 22:02:27 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
bool DropUser::Accept(HierarchicalLogicalOperatorVisitor &visitor) {
|
|
|
|
return visitor.Visit(*this);
|
|
|
|
}
|
|
|
|
|
|
|
|
WITHOUT_SINGLE_INPUT(DropUser)
|
|
|
|
|
|
|
|
class DropUserCursor : public Cursor {
|
|
|
|
public:
|
2018-06-18 21:48:46 +08:00
|
|
|
DropUserCursor() {}
|
2018-06-14 22:02:27 +08:00
|
|
|
|
|
|
|
bool Pull(Frame &, Context &ctx) override {
|
|
|
|
if (ctx.in_explicit_transaction_) {
|
|
|
|
throw UserModificationInMulticommandTxException();
|
|
|
|
}
|
|
|
|
throw utils::NotYetImplemented("user auth");
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override { throw utils::NotYetImplemented("user auth"); }
|
|
|
|
};
|
|
|
|
|
|
|
|
std::unique_ptr<Cursor> DropUser::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
2018-06-18 21:48:46 +08:00
|
|
|
return std::make_unique<DropUserCursor>();
|
2018-06-14 22:02:27 +08:00
|
|
|
}
|
|
|
|
|
2018-06-15 22:12:44 +08:00
|
|
|
CreateStream::CreateStream(std::string stream_name, Expression *stream_uri,
|
2018-06-19 20:37:02 +08:00
|
|
|
Expression *stream_topic, Expression *transform_uri,
|
|
|
|
Expression *batch_interval_in_ms,
|
|
|
|
Expression *batch_size)
|
2018-06-15 22:12:44 +08:00
|
|
|
: stream_name_(std::move(stream_name)),
|
|
|
|
stream_uri_(stream_uri),
|
2018-06-19 20:37:02 +08:00
|
|
|
stream_topic_(stream_topic),
|
2018-06-15 22:12:44 +08:00
|
|
|
transform_uri_(transform_uri),
|
2018-06-19 20:37:02 +08:00
|
|
|
batch_interval_in_ms_(batch_interval_in_ms),
|
|
|
|
batch_size_(batch_size) {}
|
2018-06-15 22:12:44 +08:00
|
|
|
|
|
|
|
WITHOUT_SINGLE_INPUT(CreateStream)
|
|
|
|
|
|
|
|
class CreateStreamCursor : public Cursor {
|
2018-06-19 20:37:02 +08:00
|
|
|
using StreamInfo = integrations::kafka::StreamInfo;
|
|
|
|
|
2018-06-15 22:12:44 +08:00
|
|
|
public:
|
2018-07-06 15:28:05 +08:00
|
|
|
CreateStreamCursor(const CreateStream &self, database::GraphDbAccessor &)
|
|
|
|
: self_(self) {}
|
2018-06-15 22:12:44 +08:00
|
|
|
|
|
|
|
bool Pull(Frame &frame, Context &ctx) override {
|
|
|
|
if (ctx.in_explicit_transaction_) {
|
|
|
|
throw StreamClauseInMulticommandTxException();
|
|
|
|
}
|
2018-06-19 20:37:02 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &ctx, GraphView::OLD);
|
|
|
|
|
|
|
|
TypedValue stream_uri = self_.stream_uri()->Accept(evaluator);
|
|
|
|
TypedValue stream_topic = self_.stream_topic()->Accept(evaluator);
|
|
|
|
TypedValue transform_uri = self_.transform_uri()->Accept(evaluator);
|
|
|
|
|
|
|
|
std::experimental::optional<int64_t> batch_interval_in_ms, batch_size;
|
|
|
|
|
|
|
|
if (self_.batch_interval_in_ms()) {
|
|
|
|
batch_interval_in_ms =
|
|
|
|
self_.batch_interval_in_ms()->Accept(evaluator).Value<int64_t>();
|
|
|
|
}
|
|
|
|
if (self_.batch_size()) {
|
|
|
|
batch_size = self_.batch_size()->Accept(evaluator).Value<int64_t>();
|
|
|
|
}
|
2018-06-15 22:12:44 +08:00
|
|
|
|
2018-06-19 20:37:02 +08:00
|
|
|
try {
|
|
|
|
StreamInfo info;
|
|
|
|
info.stream_name = self_.stream_name();
|
|
|
|
info.stream_uri = stream_uri.Value<std::string>();
|
|
|
|
info.stream_topic = stream_topic.Value<std::string>();
|
|
|
|
info.transform_uri = transform_uri.Value<std::string>();
|
|
|
|
info.batch_interval_in_ms = batch_interval_in_ms;
|
|
|
|
info.batch_size = batch_size;
|
|
|
|
|
2018-07-06 15:28:05 +08:00
|
|
|
ctx.kafka_streams_->Create(info);
|
|
|
|
} catch (const integrations::kafka::KafkaStreamException &e) {
|
2018-06-19 20:37:02 +08:00
|
|
|
throw QueryRuntimeException(e.what());
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
2018-06-15 22:12:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override { throw utils::NotYetImplemented("Create Stream"); }
|
2018-06-19 20:37:02 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
const CreateStream &self_;
|
2018-06-15 22:12:44 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
std::unique_ptr<Cursor> CreateStream::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
|
|
|
return std::make_unique<CreateStreamCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
|
|
|
DropStream::DropStream(std::string stream_name)
|
|
|
|
: stream_name_(std::move(stream_name)) {}
|
|
|
|
|
|
|
|
WITHOUT_SINGLE_INPUT(DropStream)
|
|
|
|
|
|
|
|
class DropStreamCursor : public Cursor {
|
|
|
|
public:
|
2018-07-06 15:28:05 +08:00
|
|
|
DropStreamCursor(const DropStream &self, database::GraphDbAccessor &)
|
|
|
|
: self_(self) {}
|
2018-06-15 22:12:44 +08:00
|
|
|
|
|
|
|
bool Pull(Frame &frame, Context &ctx) override {
|
|
|
|
if (ctx.in_explicit_transaction_) {
|
|
|
|
throw StreamClauseInMulticommandTxException();
|
|
|
|
}
|
|
|
|
|
2018-06-19 20:37:02 +08:00
|
|
|
try {
|
2018-07-06 15:28:05 +08:00
|
|
|
ctx.kafka_streams_->Drop(self_.stream_name());
|
|
|
|
} catch (const integrations::kafka::KafkaStreamException &e) {
|
2018-06-19 20:37:02 +08:00
|
|
|
throw QueryRuntimeException(e.what());
|
|
|
|
}
|
|
|
|
return false;
|
2018-06-15 22:12:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override { throw utils::NotYetImplemented("Drop Stream"); }
|
2018-06-19 20:37:02 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
const DropStream &self_;
|
2018-06-15 22:12:44 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
std::unique_ptr<Cursor> DropStream::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
|
|
|
return std::make_unique<DropStreamCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-06-19 20:37:02 +08:00
|
|
|
ShowStreams::ShowStreams(Symbol name_symbol, Symbol uri_symbol,
|
|
|
|
Symbol topic_symbol, Symbol transform_symbol,
|
|
|
|
Symbol status_symbol)
|
|
|
|
: name_symbol_(name_symbol),
|
|
|
|
uri_symbol_(uri_symbol),
|
|
|
|
topic_symbol_(topic_symbol),
|
|
|
|
transform_symbol_(transform_symbol),
|
|
|
|
status_symbol_(status_symbol) {}
|
|
|
|
|
2018-06-15 22:12:44 +08:00
|
|
|
WITHOUT_SINGLE_INPUT(ShowStreams)
|
|
|
|
|
2018-06-19 20:37:02 +08:00
|
|
|
std::vector<Symbol> ShowStreams::OutputSymbols(const SymbolTable &) const {
|
|
|
|
return {name_symbol_, uri_symbol_, topic_symbol_, transform_symbol_,
|
|
|
|
status_symbol_};
|
|
|
|
}
|
|
|
|
|
2018-06-15 22:12:44 +08:00
|
|
|
class ShowStreamsCursor : public Cursor {
|
|
|
|
public:
|
2018-07-06 15:28:05 +08:00
|
|
|
ShowStreamsCursor(const ShowStreams &self, database::GraphDbAccessor &)
|
|
|
|
: self_(self) {}
|
2018-06-15 22:12:44 +08:00
|
|
|
|
|
|
|
bool Pull(Frame &frame, Context &ctx) override {
|
|
|
|
if (ctx.in_explicit_transaction_) {
|
|
|
|
throw StreamClauseInMulticommandTxException();
|
|
|
|
}
|
|
|
|
|
2018-06-19 20:37:02 +08:00
|
|
|
if (!is_initialized_) {
|
2018-07-06 15:28:05 +08:00
|
|
|
streams_ = ctx.kafka_streams_->Show();
|
2018-06-19 20:37:02 +08:00
|
|
|
streams_it_ = streams_.begin();
|
|
|
|
is_initialized_ = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (streams_it_ == streams_.end()) return false;
|
|
|
|
|
|
|
|
frame[self_.name_symbol()] = streams_it_->stream_name;
|
|
|
|
frame[self_.uri_symbol()] = streams_it_->stream_uri;
|
|
|
|
frame[self_.topic_symbol()] = streams_it_->stream_topic;
|
|
|
|
frame[self_.transform_symbol()] = streams_it_->transform_uri;
|
|
|
|
frame[self_.status_symbol()] = streams_it_->is_running;
|
|
|
|
|
|
|
|
streams_it_++;
|
|
|
|
|
|
|
|
return true;
|
2018-06-15 22:12:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override { throw utils::NotYetImplemented("Show Streams"); }
|
2018-06-19 20:37:02 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
const ShowStreams &self_;
|
|
|
|
|
|
|
|
bool is_initialized_ = false;
|
|
|
|
using StreamInfo = integrations::kafka::StreamInfo;
|
|
|
|
std::vector<StreamInfo> streams_;
|
|
|
|
std::vector<StreamInfo>::iterator streams_it_ = streams_.begin();
|
2018-06-15 22:12:44 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
std::unique_ptr<Cursor> ShowStreams::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
|
|
|
return std::make_unique<ShowStreamsCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
|
|
|
StartStopStream::StartStopStream(std::string stream_name, bool is_start,
|
|
|
|
Expression *limit_batches)
|
|
|
|
: stream_name_(stream_name),
|
|
|
|
is_start_(is_start),
|
|
|
|
limit_batches_(limit_batches) {}
|
|
|
|
|
|
|
|
WITHOUT_SINGLE_INPUT(StartStopStream)
|
|
|
|
|
|
|
|
class StartStopStreamCursor : public Cursor {
|
|
|
|
public:
|
2018-06-19 20:37:02 +08:00
|
|
|
StartStopStreamCursor(const StartStopStream &self,
|
2018-07-06 15:28:05 +08:00
|
|
|
database::GraphDbAccessor &)
|
|
|
|
: self_(self) {}
|
2018-06-15 22:12:44 +08:00
|
|
|
|
|
|
|
bool Pull(Frame &frame, Context &ctx) override {
|
|
|
|
if (ctx.in_explicit_transaction_) {
|
|
|
|
throw StreamClauseInMulticommandTxException();
|
|
|
|
}
|
|
|
|
|
2018-07-03 19:57:53 +08:00
|
|
|
ExpressionEvaluator evaluator(frame, &ctx, GraphView::OLD);
|
|
|
|
std::experimental::optional<int64_t> limit_batches;
|
|
|
|
|
|
|
|
if (self_.limit_batches()) {
|
|
|
|
limit_batches = self_.limit_batches()->Accept(evaluator).Value<int64_t>();
|
|
|
|
}
|
|
|
|
|
2018-06-19 20:37:02 +08:00
|
|
|
try {
|
|
|
|
if (self_.is_start()) {
|
2018-07-06 15:28:05 +08:00
|
|
|
ctx.kafka_streams_->Start(self_.stream_name(), limit_batches);
|
2018-06-19 20:37:02 +08:00
|
|
|
} else {
|
2018-07-06 15:28:05 +08:00
|
|
|
ctx.kafka_streams_->Stop(self_.stream_name());
|
2018-06-19 20:37:02 +08:00
|
|
|
}
|
2018-07-06 15:28:05 +08:00
|
|
|
} catch (const integrations::kafka::KafkaStreamException &e) {
|
2018-06-19 20:37:02 +08:00
|
|
|
throw QueryRuntimeException(e.what());
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
2018-06-15 22:12:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override { throw utils::NotYetImplemented("Start/Stop Stream"); }
|
2018-06-19 20:37:02 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
const StartStopStream &self_;
|
2018-06-15 22:12:44 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
std::unique_ptr<Cursor> StartStopStream::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
|
|
|
return std::make_unique<StartStopStreamCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
|
|
|
StartStopAllStreams::StartStopAllStreams(bool is_start) : is_start_(is_start) {}
|
|
|
|
|
|
|
|
WITHOUT_SINGLE_INPUT(StartStopAllStreams)
|
|
|
|
|
|
|
|
class StartStopAllStreamsCursor : public Cursor {
|
|
|
|
public:
|
2018-06-19 20:37:02 +08:00
|
|
|
StartStopAllStreamsCursor(const StartStopAllStreams &self,
|
2018-07-06 15:28:05 +08:00
|
|
|
database::GraphDbAccessor &)
|
|
|
|
: self_(self) {}
|
2018-06-15 22:12:44 +08:00
|
|
|
|
|
|
|
bool Pull(Frame &frame, Context &ctx) override {
|
|
|
|
if (ctx.in_explicit_transaction_) {
|
|
|
|
throw StreamClauseInMulticommandTxException();
|
|
|
|
}
|
|
|
|
|
2018-06-19 20:37:02 +08:00
|
|
|
try {
|
|
|
|
if (self_.is_start()) {
|
2018-07-06 15:28:05 +08:00
|
|
|
ctx.kafka_streams_->StartAll();
|
2018-06-19 20:37:02 +08:00
|
|
|
} else {
|
2018-07-06 15:28:05 +08:00
|
|
|
ctx.kafka_streams_->StopAll();
|
2018-06-19 20:37:02 +08:00
|
|
|
}
|
2018-07-06 15:28:05 +08:00
|
|
|
} catch (const integrations::kafka::KafkaStreamException &e) {
|
2018-06-19 20:37:02 +08:00
|
|
|
throw QueryRuntimeException(e.what());
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
2018-06-15 22:12:44 +08:00
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override {
|
|
|
|
throw utils::NotYetImplemented("Start/Stop All Streams");
|
|
|
|
}
|
2018-06-19 20:37:02 +08:00
|
|
|
|
|
|
|
private:
|
|
|
|
const StartStopAllStreams &self_;
|
2018-06-15 22:12:44 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
std::unique_ptr<Cursor> StartStopAllStreams::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
|
|
|
return std::make_unique<StartStopAllStreamsCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2018-07-03 19:57:53 +08:00
|
|
|
TestStream::TestStream(std::string stream_name, Expression *limit_batches,
|
|
|
|
Symbol test_result_symbol)
|
|
|
|
: stream_name_(stream_name),
|
|
|
|
limit_batches_(limit_batches),
|
|
|
|
test_result_symbol_(test_result_symbol) {}
|
|
|
|
|
|
|
|
WITHOUT_SINGLE_INPUT(TestStream)
|
|
|
|
|
2018-07-06 15:28:05 +08:00
|
|
|
std::vector<Symbol> TestStream::OutputSymbols(const SymbolTable &) const {
|
|
|
|
return {test_result_symbol_};
|
|
|
|
}
|
|
|
|
|
2018-07-03 19:57:53 +08:00
|
|
|
class TestStreamCursor : public Cursor {
|
|
|
|
public:
|
2018-07-06 15:28:05 +08:00
|
|
|
TestStreamCursor(const TestStream &self, database::GraphDbAccessor &)
|
|
|
|
: self_(self) {}
|
2018-07-03 19:57:53 +08:00
|
|
|
|
|
|
|
bool Pull(Frame &frame, Context &ctx) override {
|
|
|
|
if (ctx.in_explicit_transaction_) {
|
|
|
|
throw StreamClauseInMulticommandTxException();
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!is_initialized_) {
|
|
|
|
ExpressionEvaluator evaluator(frame, &ctx, GraphView::OLD);
|
|
|
|
std::experimental::optional<int64_t> limit_batches;
|
|
|
|
|
|
|
|
if (self_.limit_batches()) {
|
|
|
|
limit_batches =
|
|
|
|
self_.limit_batches()->Accept(evaluator).Value<int64_t>();
|
|
|
|
}
|
|
|
|
|
|
|
|
try {
|
2018-07-06 15:28:05 +08:00
|
|
|
results_ = ctx.kafka_streams_->Test(self_.stream_name(), limit_batches);
|
|
|
|
} catch (const integrations::kafka::KafkaStreamException &e) {
|
2018-07-03 19:57:53 +08:00
|
|
|
throw QueryRuntimeException(e.what());
|
|
|
|
}
|
|
|
|
results_it_ = results_.begin();
|
|
|
|
is_initialized_ = true;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (results_it_ == results_.end()) return false;
|
|
|
|
|
|
|
|
frame[self_.test_result_symbol()] = *results_it_;
|
|
|
|
results_it_++;
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void Reset() override { throw utils::NotYetImplemented("Test Stream"); }
|
|
|
|
|
|
|
|
private:
|
|
|
|
const TestStream &self_;
|
|
|
|
|
|
|
|
bool is_initialized_ = false;
|
|
|
|
std::vector<std::string> results_;
|
|
|
|
std::vector<std::string>::iterator results_it_ = results_.begin();
|
|
|
|
};
|
|
|
|
|
|
|
|
std::unique_ptr<Cursor> TestStream::MakeCursor(
|
|
|
|
database::GraphDbAccessor &db) const {
|
|
|
|
return std::make_unique<TestStreamCursor>(*this, db);
|
|
|
|
}
|
|
|
|
|
2017-04-26 22:12:39 +08:00
|
|
|
} // namespace query::plan
|
2018-01-22 17:27:00 +08:00
|
|
|
|
2018-01-25 21:09:06 +08:00
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Once);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::CreateNode);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::CreateExpand);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ScanAll);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ScanAllByLabel);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ScanAllByLabelPropertyRange);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ScanAllByLabelPropertyValue);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Expand);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ExpandVariable);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Filter);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Produce);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ConstructNamedPath);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Delete);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::SetProperty);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::SetProperties);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::SetLabels);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::RemoveProperty);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::RemoveLabels);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ExpandUniquenessFilter<EdgeAccessor>);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(
|
|
|
|
query::plan::ExpandUniquenessFilter<VertexAccessor>);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Accumulate);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Aggregate);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Skip);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Limit);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::OrderBy);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Merge);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Optional);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Unwind);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Distinct);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::CreateIndex);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Union);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::PullRemote);
|
2018-02-02 17:19:19 +08:00
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Synchronize);
|
2018-02-15 17:13:53 +08:00
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::Cartesian);
|
2018-02-19 22:07:02 +08:00
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::PullRemoteOrderBy);
|
2018-06-14 22:02:27 +08:00
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ModifyUser);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::DropUser);
|
2018-06-15 22:12:44 +08:00
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::CreateStream);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::DropStream);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::ShowStreams);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::StartStopStream);
|
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::StartStopAllStreams);
|
2018-07-03 19:57:53 +08:00
|
|
|
BOOST_CLASS_EXPORT_IMPLEMENT(query::plan::TestStream);
|