Vertex and Edge distributed storage support

Summary: Vertex and Edge now use Address for storing connections to other Edges and Vertices, to support distributed storage.

Reviewers: mislav.bradac, dgleich, buda

Reviewed By: mislav.bradac, dgleich

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D977
This commit is contained in:
florijan 2017-11-14 08:47:50 +01:00
parent 696d56a8fb
commit 463e86653d
12 changed files with 202 additions and 99 deletions

View File

@ -290,8 +290,8 @@ EdgeAccessor GraphDbAccessor::InsertEdge(
}
}
auto edge_vlist = new mvcc::VersionList<Edge>(*transaction_, id, *from.vlist_,
*to.vlist_, edge_type);
auto edge_vlist = new mvcc::VersionList<Edge>(*transaction_, id, from.vlist_,
to.vlist_, edge_type);
// We need to insert edge_vlist to edges_ before calling update since update
// can throw and edge_vlist will not be garbage collected if it is not in
// edges_ skiplist.

84
src/storage/address.hpp Normal file
View File

@ -0,0 +1,84 @@
#pragma once
#include <cstdint>
#include "glog/logging.h"
namespace storage {
/**
* A data structure that tracks a Vertex/Edge location (address) that's either
* local or remote. The remote address consists of a pair (shard_id, global_id),
* while the local address is simply the memory address in the current local
* process. Both types of address are stored in the same storage space, so an
* Address always takes as much memory as a pointer does.
*
* The memory layout for storage is on x64 architecture is the following:
* - the lowest bit stores 0 if address is local and 1 if address is global
* - if the address is local all 64 bits store the local memory address
* - if the address is global then:
* - lower bits in [1, 1 + kShardIdSize] range contain the shard ID
* - upper (64 - 1 - kShardIdSize) bits, which is the [2 + kShardIdSize,
* 63] range contain the globally unique element ID
*
* @tparam TRecord - Type of record this address points to. Either Vertex or
* Edge.
*/
template <typename TLocalObj>
class Address {
static constexpr uintptr_t kTypeMask{1};
static constexpr uintptr_t kLocal{0};
static constexpr uintptr_t kRemote{1};
static constexpr size_t kShardIdPos{1};
// To modify memory layout only change kShardIdSize.
static constexpr size_t kShardIdSize{10};
static constexpr size_t KGlobalIdPos{kShardIdPos + kShardIdSize};
static constexpr size_t kGlobalIdSize{64 - 1 - kShardIdSize};
public:
// Constructor for local Address.
Address(TLocalObj *ptr) {
uintptr_t ptr_no_type = reinterpret_cast<uintptr_t>(ptr);
DCHECK((ptr_no_type & kTypeMask) == 0) << "Ptr has type_mask bit set";
storage_ = ptr_no_type | kLocal;
}
// Constructor for remote Address.
Address(uint64_t shard_id, uint64_t global_id) {
// TODO make a SSOT about max shard ID. Ensure that a shard with a larger ID
// can't be created, and that this ID fits into kShardIdSize bits.
CHECK(shard_id < (1ULL << (kShardIdSize - 1))) << "Shard ID too big.";
CHECK(global_id < (1ULL << (kGlobalIdSize - 1)))
<< "Global element ID too big.";
storage_ = kRemote;
storage_ |= shard_id << kShardIdPos;
storage_ |= global_id << KGlobalIdPos;
}
bool is_local() const { return (storage_ & kTypeMask) == kLocal; }
bool is_remote() const { return (storage_ & kTypeMask) == kRemote; }
TLocalObj *local() const {
DCHECK(is_local()) << "Attempting to get local address from global";
return reinterpret_cast<TLocalObj *>(storage_);
}
uint64_t shard_id() const {
DCHECK(is_remote()) << "Attempting to get shard ID from local address";
return (storage_ >> kShardIdPos) & ((1ULL << kShardIdSize) - 1);
}
uint64_t global_id() const {
DCHECK(is_remote()) << "Attempting to get global ID from local address";
return (storage_ >> KGlobalIdPos) & ((1ULL << kGlobalIdSize) - 1);
}
bool operator==(const Address<TLocalObj> &other) const {
return storage_ == other.storage_;
}
private:
uintptr_t storage_{0};
};
}

View File

@ -3,21 +3,24 @@
#include "database/graph_db_datatypes.hpp"
#include "mvcc/record.hpp"
#include "mvcc/version_list.hpp"
#include "storage/address.hpp"
#include "storage/property_value_store.hpp"
class Vertex;
class Edge : public mvcc::Record<Edge> {
using VertexAddress = storage::Address<mvcc::VersionList<Vertex>>;
public:
Edge(mvcc::VersionList<Vertex> &from, mvcc::VersionList<Vertex> &to,
GraphDbTypes::EdgeType edge_type)
Edge(VertexAddress from, VertexAddress to, GraphDbTypes::EdgeType edge_type)
: from_(from), to_(to), edge_type_(edge_type) {}
// Returns new Edge with copy of data stored in this Edge, but without
// copying superclass' members.
Edge *CloneData() { return new Edge(*this); }
mvcc::VersionList<Vertex> &from_;
mvcc::VersionList<Vertex> &to_;
VertexAddress from_;
VertexAddress to_;
GraphDbTypes::EdgeType edge_type_;
PropertyValueStore<GraphDbTypes::Property> properties_;
@ -29,4 +32,3 @@ class Edge : public mvcc::Record<Edge> {
edge_type_(other.edge_type_),
properties_(other.properties_) {}
};
;

View File

@ -9,24 +9,22 @@ GraphDbTypes::EdgeType EdgeAccessor::EdgeType() const {
}
VertexAccessor EdgeAccessor::from() const {
return VertexAccessor(current().from_, db_accessor());
return VertexAccessor(*current().from_.local(), db_accessor());
}
bool EdgeAccessor::from_is(const VertexAccessor &v) const {
return v.operator==(&current().from_);
return v == current().from_.local();
}
VertexAccessor EdgeAccessor::to() const {
return VertexAccessor(current().to_, db_accessor());
return VertexAccessor(*current().to_.local(), db_accessor());
}
bool EdgeAccessor::to_is(const VertexAccessor &v) const {
return v.operator==(&current().to_);
return v == current().to_.local();
}
bool EdgeAccessor::is_cycle() const {
return &current().to_ == &current().from_;
}
bool EdgeAccessor::is_cycle() const { return current().to_ == current().from_; }
std::ostream &operator<<(std::ostream &os, const EdgeAccessor &ea) {
os << "E[" << ea.db_accessor().EdgeTypeName(ea.EdgeType());

View File

@ -6,6 +6,7 @@
#include "database/graph_db_datatypes.hpp"
#include "glog/logging.h"
#include "mvcc/version_list.hpp"
#include "storage/address.hpp"
#include "utils/algorithm.hpp"
// forward declare Vertex and Edge because they need this data structure
@ -15,15 +16,15 @@ class Vertex;
/**
* A data stucture that holds a number of edges. This implementation assumes
* that separate Edges instances are used for incoming and outgoing edges in a
* vertex (and consequently that edge pointers are unique in it).
* vertex (and consequently that edge Addresses are unique in it).
*/
class Edges {
using vertex_ptr_t = mvcc::VersionList<Vertex> *;
using edge_ptr_t = mvcc::VersionList<Edge> *;
using VertexAddress = storage::Address<mvcc::VersionList<Vertex>>;
using EdgeAddress = storage::Address<mvcc::VersionList<Edge>>;
struct Element {
vertex_ptr_t vertex;
edge_ptr_t edge;
VertexAddress vertex;
EdgeAddress edge;
GraphDbTypes::EdgeType edge_type;
};
@ -45,13 +46,13 @@ class Edges {
*
* @param iterator - Iterator in the underlying storage.
* @param end - End iterator in the underlying storage.
* @param vertex - The destination vertex vlist pointer. If nullptr the
* @param vertex - The destination vertex address. If empty the
* edges are not filtered on destination.
* @param edge_types - The edge types at least one of which must be matched.
* If nullptr edges are not filtered on type.
*/
Iterator(std::vector<Element>::const_iterator position,
std::vector<Element>::const_iterator end, vertex_ptr_t vertex,
std::vector<Element>::const_iterator end, VertexAddress vertex,
const std::vector<GraphDbTypes::EdgeType> *edge_types)
: position_(position),
end_(end),
@ -79,21 +80,19 @@ class Edges {
// end_ is used only in update_position() to limit find.
std::vector<Element>::const_iterator end_;
// Optional predicates. If set they define which edges are skipped by
// the
// Optional predicates. If set they define which edges are skipped by the
// iterator. Only one can be not-null in the current implementation.
vertex_ptr_t vertex_{nullptr};
VertexAddress vertex_{nullptr};
// For edge types we use a vector pointer because it's optional.
const std::vector<GraphDbTypes::EdgeType> *edge_types_ = nullptr;
/** Helper function that skips edges that don't satisfy the predicate
* present in this iterator. */
void update_position() {
if (vertex_) {
position_ = std::find_if(position_,
end_, [v = this->vertex_](const Element &e) {
return e.vertex == v;
});
if (vertex_.local()) {
position_ = std::find_if(
position_, end_,
[v = this->vertex_](const Element &e) { return e.vertex == v; });
}
if (edge_types_) {
position_ = std::find_if(position_, end_, [this](const Element &e) {
@ -112,7 +111,7 @@ class Edges {
* @param edge - The edge.
* @param edge_type - Type of the edge.
*/
void emplace(vertex_ptr_t vertex, edge_ptr_t edge,
void emplace(VertexAddress vertex, EdgeAddress edge,
GraphDbTypes::EdgeType edge_type) {
storage_.emplace_back(Element{vertex, edge, edge_type});
}
@ -120,7 +119,7 @@ class Edges {
/**
* Removes an edge from this structure.
*/
void RemoveEdge(edge_ptr_t edge) {
void RemoveEdge(EdgeAddress edge) {
auto found = std::find_if(
storage_.begin(), storage_.end(),
[edge](const Element &element) { return edge == element.edge; });
@ -137,12 +136,12 @@ class Edges {
* Creates a beginning iterator that will skip edges whose destination
* vertex is not equal to the given vertex.
*
* @param vertex - The destination vertex vlist pointer. If nullptr the
* @param vertex - The destination vertex Address. If empty the
* edges are not filtered on destination.
* @param edge_types - The edge types at least one of which must be matched.
* If nullptr edges are not filtered on type.
*/
auto begin(vertex_ptr_t vertex,
auto begin(VertexAddress vertex,
const std::vector<GraphDbTypes::EdgeType> *edge_types) const {
if (edge_types && edge_types->empty()) edge_types = nullptr;
return Iterator(storage_.begin(), storage_.end(), vertex, edge_types);

View File

@ -1,12 +1,12 @@
#pragma once
#include "glog/logging.h"
#include "database/graph_db_datatypes.hpp"
#include "mvcc/version_list.hpp"
#include "storage/property_value.hpp"
#include "utils/total_ordering.hpp"
#include "glog/logging.h"
#include "storage/property_value_store.hpp"
#include "utils/total_ordering.hpp"
class GraphDbAccessor;

View File

@ -1,43 +0,0 @@
#pragma once
#include <cppitertools/reversed.hpp>
#include "cppitertools/imap.hpp"
/**
* Converts a (beginning, end) pair of iterators into an iterable that can be
* passed on to itertools. */
template <typename TIterator>
class Iterable {
public:
Iterable(TIterator &&begin, TIterator &&end)
: begin_(std::forward<TIterator>(begin)),
end_(std::forward<TIterator>(end)) {}
auto begin() { return begin_; };
auto end() { return end_; };
private:
TIterator begin_;
TIterator end_;
};
/**
* Creates an iterator over record accessors (Edge or Vertex).
*
* @param begin Start iterator over (vertex_vlist_ptr, edge_vlist_ptr) pairs.
* @param end End iterator over (vertex_vlist_ptr, edge_vlist_ptr) pairs.
* @param db_accessor A database accessor to create the record accessors with.
*
* @tparam TAccessor The exact type of accessor.
* @tparam TIterable An iterable of pointers to version list objects.
*/
template <typename TAccessor, typename TIterator>
auto MakeAccessorIterator(TIterator &&begin, TIterator &&end,
GraphDbAccessor &db_accessor) {
return iter::imap(
[&db_accessor](auto &edges_element) {
return TAccessor(*edges_element.edge, db_accessor);
},
Iterable<TIterator>(std::forward<TIterator>(begin),
std::forward<TIterator>(end)));
}

View File

@ -2,6 +2,8 @@
#include "database/graph_db_datatypes.hpp"
#include "mvcc/record.hpp"
#include "mvcc/version_list.hpp"
#include "storage/address.hpp"
#include "storage/edges.hpp"
#include "storage/property_value_store.hpp"

View File

@ -3,7 +3,6 @@
#include <algorithm>
#include "database/graph_db_accessor.hpp"
#include "storage/util.hpp"
#include "utils/algorithm.hpp"
size_t VertexAccessor::out_degree() const { return current().out_.size(); }

View File

@ -5,11 +5,12 @@
#include <vector>
#include "cppitertools/chain.hpp"
#include "cppitertools/imap.hpp"
#include "storage/record_accessor.hpp"
#include "storage/util.hpp"
#include "storage/vertex.hpp"
#include "storage/edge_accessor.hpp"
#include "storage/record_accessor.hpp"
#include "storage/vertex.hpp"
#include "utils/algorithm.hpp"
/**
* Provides ways for the client programmer (i.e. code generated
@ -19,6 +20,19 @@
* takes care of MVCC versioning.
*/
class VertexAccessor : public RecordAccessor<Vertex> {
// Helper function for creating an iterator over edges.
template <typename TIterator>
static inline auto MakeAccessorIterator(TIterator &&begin, TIterator &&end,
GraphDbAccessor &db_accessor) {
return iter::imap(
[&db_accessor](auto &edges_element) {
// Currently only local storage is supported.
return EdgeAccessor(*edges_element.edge.local(), db_accessor);
},
utils::Iterable<TIterator>(std::forward<TIterator>(begin),
std::forward<TIterator>(end)));
}
public:
using RecordAccessor::RecordAccessor;
@ -66,8 +80,8 @@ class VertexAccessor : public RecordAccessor<Vertex> {
* Returns EdgeAccessors for all incoming edges.
*/
auto in() const {
return MakeAccessorIterator<EdgeAccessor>(
current().in_.begin(), current().in_.end(), db_accessor());
return MakeAccessorIterator(current().in_.begin(), current().in_.end(),
db_accessor());
}
/**
@ -80,9 +94,8 @@ class VertexAccessor : public RecordAccessor<Vertex> {
auto in(
const VertexAccessor &dest,
const std::vector<GraphDbTypes::EdgeType> *edge_types = nullptr) const {
return MakeAccessorIterator<EdgeAccessor>(
current().in_.begin(dest.vlist_, edge_types), current().in_.end(),
db_accessor());
return MakeAccessorIterator(current().in_.begin(dest.vlist_, edge_types),
current().in_.end(), db_accessor());
}
/**
@ -92,17 +105,16 @@ class VertexAccessor : public RecordAccessor<Vertex> {
* or empty, the parameter is ignored.
*/
auto in(const std::vector<GraphDbTypes::EdgeType> *edge_types) const {
return MakeAccessorIterator<EdgeAccessor>(
current().in_.begin(nullptr, edge_types), current().in_.end(),
db_accessor());
return MakeAccessorIterator(current().in_.begin(nullptr, edge_types),
current().in_.end(), db_accessor());
}
/**
* Returns EdgeAccessors for all outgoing edges.
*/
auto out() const {
return MakeAccessorIterator<EdgeAccessor>(
current().out_.begin(), current().out_.end(), db_accessor());
return MakeAccessorIterator(current().out_.begin(), current().out_.end(),
db_accessor());
}
/**
@ -116,9 +128,8 @@ class VertexAccessor : public RecordAccessor<Vertex> {
auto out(
const VertexAccessor &dest,
const std::vector<GraphDbTypes::EdgeType> *edge_types = nullptr) const {
return MakeAccessorIterator<EdgeAccessor>(
current().out_.begin(dest.vlist_, edge_types), current().out_.end(),
db_accessor());
return MakeAccessorIterator(current().out_.begin(dest.vlist_, edge_types),
current().out_.end(), db_accessor());
}
/**
@ -128,9 +139,8 @@ class VertexAccessor : public RecordAccessor<Vertex> {
* or empty, the parameter is ignored.
*/
auto out(const std::vector<GraphDbTypes::EdgeType> *edge_types) const {
return MakeAccessorIterator<EdgeAccessor>(
current().out_.begin(nullptr, edge_types), current().out_.end(),
db_accessor());
return MakeAccessorIterator(current().out_.begin(nullptr, edge_types),
current().out_.end(), db_accessor());
}
};

View File

@ -103,4 +103,23 @@ template <typename TIterable, typename TElement>
inline bool Contains(const TIterable &iterable, const TElement &element) {
return std::find(iterable.begin(), iterable.end(), element) != iterable.end();
}
/**
* Converts a (beginning, end) pair of iterators into an iterable that can be
* passed on to itertools.
*/
template <typename TIterator>
class Iterable {
public:
Iterable(TIterator &&begin, TIterator &&end)
: begin_(std::forward<TIterator>(begin)),
end_(std::forward<TIterator>(end)) {}
auto begin() { return begin_; };
auto end() { return end_; };
private:
TIterator begin_;
TIterator end_;
};
} // namespace utils

View File

@ -0,0 +1,33 @@
#include "gtest/gtest.h"
#include "storage/address.hpp"
using storage::Address;
TEST(Address, Local) {
std::string a{"bla"};
Address<std::string> address(&a);
EXPECT_TRUE(address.is_local());
EXPECT_FALSE(address.is_remote());
EXPECT_EQ(address.local(), &a);
}
TEST(Address, CopyCompare) {
int a = 12;
int b = 13;
Address<int> addr_a{&a};
EXPECT_EQ(Address<int>{&a}, addr_a);
EXPECT_FALSE(Address<int>{&b} == addr_a);
}
TEST(Address, Global) {
uint64_t shard_id{13};
uint64_t global_id{31};
Address<int> address{shard_id, global_id};
EXPECT_TRUE(address.is_remote());
EXPECT_FALSE(address.is_local());
EXPECT_EQ(address.shard_id(), shard_id);
EXPECT_EQ(address.global_id(), global_id);
}