2018-08-29 22:09:07 +08:00
|
|
|
/// @file
|
2017-02-04 16:01:15 +08:00
|
|
|
#pragma once
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
#include <atomic>
|
2017-12-19 19:40:30 +08:00
|
|
|
#include <memory>
|
2018-02-20 22:56:27 +08:00
|
|
|
#include <vector>
|
2017-12-19 19:40:30 +08:00
|
|
|
|
2018-01-10 19:18:03 +08:00
|
|
|
#include "database/counters.hpp"
|
2018-01-12 22:17:04 +08:00
|
|
|
#include "database/storage.hpp"
|
|
|
|
#include "database/storage_gc.hpp"
|
2018-08-01 15:48:38 +08:00
|
|
|
#include "durability/recovery.hpp"
|
2017-11-13 16:50:49 +08:00
|
|
|
#include "durability/wal.hpp"
|
2018-01-15 21:03:07 +08:00
|
|
|
#include "io/network/endpoint.hpp"
|
2017-11-23 23:36:54 +08:00
|
|
|
#include "storage/concurrent_id_mapper.hpp"
|
2018-01-16 17:09:15 +08:00
|
|
|
#include "storage/types.hpp"
|
2017-02-18 18:54:37 +08:00
|
|
|
#include "transactions/engine.hpp"
|
2017-04-10 21:44:36 +08:00
|
|
|
#include "utils/scheduler.hpp"
|
2017-02-04 16:01:15 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
namespace database {
|
|
|
|
|
|
|
|
/// Database configuration. Initialized from flags, but modifiable.
|
|
|
|
struct Config {
|
|
|
|
Config();
|
2018-05-10 22:33:41 +08:00
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
// Durability flags.
|
|
|
|
bool durability_enabled;
|
|
|
|
std::string durability_directory;
|
|
|
|
bool db_recover_on_startup;
|
|
|
|
int snapshot_cycle_sec;
|
|
|
|
int snapshot_max_retained;
|
|
|
|
int snapshot_on_exit;
|
2018-08-24 16:43:27 +08:00
|
|
|
bool synchronous_commit;
|
2018-01-12 22:17:04 +08:00
|
|
|
|
|
|
|
// Misc flags.
|
|
|
|
int gc_cycle_sec;
|
|
|
|
int query_execution_time_sec;
|
|
|
|
|
2018-05-16 18:00:56 +08:00
|
|
|
// set of properties which will be stored on disk
|
|
|
|
std::vector<std::string> properties_on_disk;
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
// Distributed master/worker flags.
|
2018-05-29 17:32:21 +08:00
|
|
|
bool dynamic_graph_partitioner_enabled{false};
|
2018-09-27 21:07:46 +08:00
|
|
|
int rpc_num_client_workers{0};
|
|
|
|
int rpc_num_server_workers{0};
|
2018-04-20 20:58:49 +08:00
|
|
|
int worker_id{0};
|
|
|
|
io::network::Endpoint master_endpoint{"0.0.0.0", 0};
|
|
|
|
io::network::Endpoint worker_endpoint{"0.0.0.0", 0};
|
2018-05-09 19:26:22 +08:00
|
|
|
int recovering_cluster_size{0};
|
2018-01-12 22:17:04 +08:00
|
|
|
};
|
|
|
|
|
2018-07-26 15:08:21 +08:00
|
|
|
class GraphDbAccessor;
|
|
|
|
|
2018-08-29 22:09:07 +08:00
|
|
|
/// An abstract base class providing the interface for a graph database.
|
2018-07-26 15:08:21 +08:00
|
|
|
///
|
|
|
|
/// Always be sure that GraphDb object is destructed before main exits, i. e.
|
|
|
|
/// GraphDb object shouldn't be part of global/static variable, except if its
|
|
|
|
/// destructor is explicitly called before main exits. Consider code:
|
|
|
|
///
|
|
|
|
/// GraphDb db; // KeyIndex is created as a part of database::Storage
|
|
|
|
/// int main() {
|
|
|
|
/// GraphDbAccessor dba(db);
|
|
|
|
/// auto v = dba.InsertVertex();
|
|
|
|
/// v.add_label(dba.Label(
|
|
|
|
/// "Start")); // New SkipList is created in KeyIndex for LabelIndex.
|
|
|
|
/// // That SkipList creates SkipListGc which
|
|
|
|
/// // initialises static Executor object.
|
|
|
|
/// return 0;
|
|
|
|
/// }
|
|
|
|
///
|
|
|
|
/// After main exits: 1. Executor is destructed, 2. KeyIndex is destructed.
|
|
|
|
/// Destructor of KeyIndex calls delete on created SkipLists which destroy
|
|
|
|
/// SkipListGc that tries to use Excutioner object that doesn't exist anymore.
|
|
|
|
/// -> CRASH
|
2017-06-21 17:29:13 +08:00
|
|
|
class GraphDb {
|
2017-02-18 18:54:37 +08:00
|
|
|
public:
|
2018-01-19 21:49:58 +08:00
|
|
|
GraphDb() {}
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
GraphDb(const GraphDb &) = delete;
|
|
|
|
GraphDb(GraphDb &&) = delete;
|
|
|
|
GraphDb &operator=(const GraphDb &) = delete;
|
|
|
|
GraphDb &operator=(GraphDb &&) = delete;
|
|
|
|
|
2018-01-19 21:49:58 +08:00
|
|
|
virtual ~GraphDb() {}
|
|
|
|
|
2018-07-26 15:08:21 +08:00
|
|
|
/// Create a new accessor by starting a new transaction.
|
|
|
|
virtual std::unique_ptr<GraphDbAccessor> Access() = 0;
|
|
|
|
/// Create an accessor for a running transaction.
|
|
|
|
virtual std::unique_ptr<GraphDbAccessor> Access(tx::TransactionId) = 0;
|
|
|
|
|
2018-01-19 21:49:58 +08:00
|
|
|
virtual Storage &storage() = 0;
|
|
|
|
virtual durability::WriteAheadLog &wal() = 0;
|
|
|
|
virtual tx::Engine &tx_engine() = 0;
|
|
|
|
virtual storage::ConcurrentIdMapper<storage::Label> &label_mapper() = 0;
|
|
|
|
virtual storage::ConcurrentIdMapper<storage::EdgeType>
|
|
|
|
&edge_type_mapper() = 0;
|
|
|
|
virtual storage::ConcurrentIdMapper<storage::Property> &property_mapper() = 0;
|
|
|
|
virtual database::Counters &counters() = 0;
|
|
|
|
virtual void CollectGarbage() = 0;
|
|
|
|
|
2018-07-26 15:08:21 +08:00
|
|
|
/// Makes a snapshot from the visibility of the given accessor
|
2018-04-06 15:59:54 +08:00
|
|
|
virtual bool MakeSnapshot(GraphDbAccessor &accessor) = 0;
|
|
|
|
|
2018-07-26 15:08:21 +08:00
|
|
|
/// Releases the storage object safely and creates a new object.
|
|
|
|
/// This is needed because of recovery, otherwise we might try to recover into
|
|
|
|
/// a storage which has already been polluted because of a failed previous
|
|
|
|
/// recovery
|
2018-04-20 20:16:54 +08:00
|
|
|
virtual void ReinitializeStorage() = 0;
|
|
|
|
|
2018-07-26 15:08:21 +08:00
|
|
|
/// When this is false, no new transactions should be created.
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
bool is_accepting_transactions() const { return is_accepting_transactions_; }
|
|
|
|
|
|
|
|
protected:
|
|
|
|
std::atomic<bool> is_accepting_transactions_{true};
|
2018-01-19 21:49:58 +08:00
|
|
|
};
|
|
|
|
|
|
|
|
namespace impl {
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
class SingleNode;
|
|
|
|
} // namespace impl
|
|
|
|
|
|
|
|
class SingleNode final : public GraphDb {
|
2018-01-19 21:49:58 +08:00
|
|
|
public:
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
explicit SingleNode(Config config = Config());
|
|
|
|
~SingleNode();
|
|
|
|
|
2018-07-26 15:08:21 +08:00
|
|
|
std::unique_ptr<GraphDbAccessor> Access() override;
|
|
|
|
std::unique_ptr<GraphDbAccessor> Access(tx::TransactionId) override;
|
|
|
|
|
2018-01-19 21:49:58 +08:00
|
|
|
Storage &storage() override;
|
|
|
|
durability::WriteAheadLog &wal() override;
|
|
|
|
tx::Engine &tx_engine() override;
|
|
|
|
storage::ConcurrentIdMapper<storage::Label> &label_mapper() override;
|
|
|
|
storage::ConcurrentIdMapper<storage::EdgeType> &edge_type_mapper() override;
|
|
|
|
storage::ConcurrentIdMapper<storage::Property> &property_mapper() override;
|
|
|
|
database::Counters &counters() override;
|
|
|
|
void CollectGarbage() override;
|
2017-10-30 17:43:25 +08:00
|
|
|
|
2018-04-06 15:59:54 +08:00
|
|
|
bool MakeSnapshot(GraphDbAccessor &accessor) override;
|
2018-04-20 20:16:54 +08:00
|
|
|
void ReinitializeStorage() override;
|
2018-03-01 18:03:54 +08:00
|
|
|
|
2017-10-30 17:43:25 +08:00
|
|
|
private:
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
std::unique_ptr<impl::SingleNode> impl_;
|
2017-04-10 21:44:36 +08:00
|
|
|
|
2018-04-22 14:31:09 +08:00
|
|
|
std::unique_ptr<utils::Scheduler> snapshot_creator_;
|
Split GraphDb to distributed and single node files
Summary:
This change, hopefully, simplifies the implementation of different kinds
of GraphDb. The pimpl idiom is now simplified by removing all of the
crazy inheritance. Implementations classes are just plain data stores,
without any methods. The interface classes now have a more flat
hierarchy:
```
GraphDb (pure interface)
|
+----+---------- DistributedGraphDb (pure interface)
| |
Single Node +-----+------+
| |
Master Worker
```
DistributedGraphDb is used as an intermediate interface for all the
things that should work only in distributed. Therefore, virtual calls
for distributed stuff have been removed from GraphDb. Some are exposed
via DistributedGraphDb, other's are only in concrete Master and Worker
classes. The code which relied on those virtual calls has been
refactored to either use DistributedGraphDb, take a pointer to what is
actually needed or use dynamic_cast. Obviously, dynamic_cast is a
temporary solution and should be replaced with another mechanism (e.g.
virtual call, or some other function pointer style).
The cost of the above change is some code duplication in constructors
and destructors of classes. This duplication has a lot of little tweaks
that make it hard to generalize, not to mention that virtual calls do
not work in constructor and destructor. If we really care about
generalizing this, we should think about abandoning RAII in favor of
constructor + Init method.
The next steps for splitting the dependencies that seem logical are:
1) Split GraphDbAccessor implementation, either via inheritance or
passing in an implementation pointer. GraphDbAccessor should then
only be created by a virtual call on GraphDb.
2) Split Interpreter implementation. Besides allowing single node
interpreter to exist without depending on distributed, this will
enable the planner and operators to be correctly separated.
Reviewers: msantl, mferencevic, ipaljak
Reviewed By: msantl
Subscribers: dgleich, pullbot
Differential Revision: https://phabricator.memgraph.io/D1493
2018-07-19 23:00:50 +08:00
|
|
|
utils::Scheduler transaction_killer_;
|
2018-01-12 22:17:04 +08:00
|
|
|
};
|
2017-09-13 23:09:04 +08:00
|
|
|
|
2018-08-01 15:48:38 +08:00
|
|
|
class SingleNodeRecoveryTransanctions final
|
|
|
|
: public durability::RecoveryTransactions {
|
|
|
|
public:
|
|
|
|
explicit SingleNodeRecoveryTransanctions(SingleNode *db);
|
|
|
|
~SingleNodeRecoveryTransanctions();
|
|
|
|
|
|
|
|
void Begin(const tx::TransactionId &tx_id) override;
|
|
|
|
void Abort(const tx::TransactionId &tx_id) override;
|
|
|
|
void Commit(const tx::TransactionId &tx_id) override;
|
|
|
|
void Apply(const database::StateDelta &delta) override;
|
|
|
|
|
|
|
|
private:
|
|
|
|
SingleNode *db_;
|
|
|
|
std::unordered_map<tx::TransactionId, std::unique_ptr<GraphDbAccessor>>
|
|
|
|
accessors_;
|
|
|
|
};
|
|
|
|
|
2018-01-12 22:17:04 +08:00
|
|
|
} // namespace database
|