Add edge_types indexes.

Summary: Refactor label_index. Also add edge_type indexes.

Reviewers: mislav.bradac, florijan

Reviewed By: florijan

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D217
This commit is contained in:
Dominik Gleich 2017-04-05 17:23:00 +02:00
parent a0049c9691
commit 8c7ed8c186
9 changed files with 318 additions and 115 deletions

View File

@ -5,7 +5,7 @@
#include "data_structures/concurrent/concurrent_set.hpp"
#include "data_structures/concurrent/skiplist.hpp"
#include "database/graph_db_datatypes.hpp"
#include "database/indexes/labels_index.hpp"
#include "database/indexes/key_index.hpp"
#include "mvcc/version_list.hpp"
#include "storage/edge.hpp"
#include "storage/garbage_collector.hpp"
@ -67,5 +67,6 @@ class GraphDb {
ConcurrentSet<std::string> properties_;
// indexes
LabelsIndex<Vertex> labels_index_;
KeyIndex<GraphDbTypes::Label, Vertex> labels_index_;
KeyIndex<GraphDbTypes::EdgeType, Edge> edge_types_index_;
};

View File

@ -42,7 +42,16 @@ VertexAccessor GraphDbAccessor::insert_vertex() {
bool success = db_.vertices_.access().insert(vertex_vlist).second;
if (success) return VertexAccessor(*vertex_vlist, *this);
throw CreationException("Unable to create a Vertex after 5 attempts");
throw CreationException("Unable to create a Vertex.");
}
void GraphDbAccessor::update_label_index(
const GraphDbTypes::Label &label, const VertexAccessor &vertex_accessor) {
this->db_.labels_index_.Update(label, vertex_accessor.vlist_);
}
size_t GraphDbAccessor::vertices_count(const GraphDbTypes::Label &label) {
return this->db_.labels_index_.Count(label);
}
bool GraphDbAccessor::remove_vertex(VertexAccessor &vertex_accessor) {
@ -59,15 +68,16 @@ void GraphDbAccessor::detach_remove_vertex(VertexAccessor &vertex_accessor) {
for (auto edge_accessor : vertex_accessor.in()) remove_edge(edge_accessor);
vertex_accessor.SwitchNew();
for (auto edge_accessor : vertex_accessor.out()) remove_edge(edge_accessor);
vertex_accessor.vlist_->remove(vertex_accessor.SwitchNew().current_, *transaction_);
vertex_accessor.vlist_->remove(vertex_accessor.SwitchNew().current_,
*transaction_);
}
EdgeAccessor GraphDbAccessor::insert_edge(VertexAccessor &from,
VertexAccessor &to,
GraphDbTypes::EdgeType edge_type) {
// create an edge
auto edge_vlist = new mvcc::VersionList<Edge>(
*transaction_, *from.vlist_, *to.vlist_, edge_type);
auto edge_vlist = new mvcc::VersionList<Edge>(*transaction_, *from.vlist_,
*to.vlist_, edge_type);
// ensure that the "from" accessor has the latest version
from.SwitchNew();
@ -79,9 +89,25 @@ EdgeAccessor GraphDbAccessor::insert_edge(VertexAccessor &from,
to.update().in_.emplace_back(edge_vlist);
bool success = db_.edges_.access().insert(edge_vlist).second;
if (success) return EdgeAccessor(*edge_vlist, *this);
const auto edge_accessor = EdgeAccessor(*edge_vlist, *this);
if (success) {
// This has to be here because there is no single method called for
// type seting. It's set here, and sometimes in set_edge_type method.
update_edge_type_index(edge_type, edge_accessor);
return edge_accessor;
}
throw CreationException("Unable to create an Edge after 5 attempts");
throw CreationException("Unable to create an Edge.");
}
void GraphDbAccessor::update_edge_type_index(
const GraphDbTypes::EdgeType &edge_type,
const EdgeAccessor &edge_accessor) {
this->db_.edge_types_index_.Update(edge_type, edge_accessor.vlist_);
}
size_t GraphDbAccessor::edges_count(const GraphDbTypes::EdgeType &edge_type) {
return this->db_.edge_types_index_.Count(edge_type);
}
/**
@ -99,7 +125,8 @@ void swap_out_edge(std::vector<mvcc::VersionList<Edge> *> &edges,
void GraphDbAccessor::remove_edge(EdgeAccessor &edge_accessor) {
swap_out_edge(edge_accessor.from().update().out_, edge_accessor.vlist_);
swap_out_edge(edge_accessor.to().update().in_, edge_accessor.vlist_);
edge_accessor.vlist_->remove(edge_accessor.SwitchNew().current_, *transaction_);
edge_accessor.vlist_->remove(edge_accessor.SwitchNew().current_,
*transaction_);
}
GraphDbTypes::Label GraphDbAccessor::label(const std::string &label_name) {

View File

@ -90,6 +90,18 @@ class GraphDbAccessor {
std::move(accessors));
}
/**
* Return VertexAccessors which contain the current label for the current
* transaction visibilty.
* @param label - label for which to return VertexAccessors
* @return iterable collection
*/
auto vertices(const GraphDbTypes::Label &label) {
return iter::imap(
[this](auto vlist) { return VertexAccessor(*vlist, *this); },
db_.labels_index_.Acquire(label, *transaction_));
}
/**
* Creates a new Edge and returns an accessor to it.
*
@ -126,36 +138,49 @@ class GraphDbAccessor {
std::move(accessors));
}
/**
* Return EdgeAccessors which contain the edge_type for the current
* transaction visibilty.
* @param edge_type - edge_type for which to return EdgeAccessors
* @return iterable collection
*/
auto edges(const GraphDbTypes::EdgeType &edge_type) {
return iter::imap(
[this](auto vlist) { return EdgeAccessor(*vlist, *this); },
db_.edge_types_index_.Acquire(edge_type, *transaction_));
}
/**
* Insert this record into corresponding label index.
* @param label - label index into which to insert record
* @param record - record which to insert
*/
template <typename TRecord>
void update_index(const GraphDbTypes::Label &label, const TRecord &record) {
db_.labels_index_.Add(label, record.vlist_);
}
void update_label_index(const GraphDbTypes::Label &label,
const VertexAccessor &vertex_accessor);
/**
* Return VertexAccessors which contain the current label for the current
* transaction visibilty.
* @param label - label for which to return VertexAccessors
* @return iterable collection
* Insert this record into corresponding edge_type index.
* @param edge_type - edge_type index into which to insert record
* @param record - record which to insert
*/
auto vertices_by_label(const GraphDbTypes::Label &label) {
return iter::imap(
[this](auto vlist) { return VertexAccessor(*vlist, *this); },
db_.labels_index_.Acquire(label, *transaction_));
}
void update_edge_type_index(const GraphDbTypes::EdgeType &edge_type,
const EdgeAccessor &edge_accessor);
/**
* Return approximate number of vertices under indexes with the given label.
* Note that this is always an over-estimate and never an under-estimate.
* @param label - label to check for
* @return number of vertices with the given label
*/
size_t vertices_by_label_count(const GraphDbTypes::Label &label) {
return db_.labels_index_.Count(label);
}
size_t vertices_count(const GraphDbTypes::Label &label);
/**
* Return approximate number of edges under indexes with the given edge_type.
* Note that this is always an over-estimate and never an under-estimate.
* @param edge_type - edge_type to check for
* @return number of edges with the given edge_type
*/
size_t edges_count(const GraphDbTypes::EdgeType &edge_type);
/**
* Obtains the Label for the label's name.

View File

@ -0,0 +1,110 @@
#pragma once
#include "cppitertools/filter.hpp"
#include "cppitertools/imap.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
#include "database/graph_db.hpp"
#include "database/graph_db_datatypes.hpp"
#include "mvcc/version_list.hpp"
#include "storage/edge.hpp"
#include "storage/vertex.hpp"
#include "transactions/transaction.hpp"
/**
* @brief Implements index update and acquire.
* @Tparam TKey - underlying type by which to key objects
* @Tparam TRecord - object stored under the given key
*/
template <typename TKey, typename TRecord>
class KeyIndex {
public:
/**
* @brief - Add vlist, if new, to TKey specific storage.
* @param key - TKey index to update.
* @param vlist - pointer to vlist entry to add.
*/
void Update(const TKey &key, mvcc::VersionList<TRecord> *vlist) {
GetKeyStorage(key)->access().insert(vlist);
}
/**
* @brief - Acquire all the inserted vlists in TKey specific storage which
* still have that label visible in this transaction.
* @param key - key to query.
* @param t - current transaction, which determines visibility.
* @return iterable collection of vlists records<TRecord> with the requested
* TKey.
*/
auto Acquire(const TKey &key, const tx::Transaction &t) {
auto index = GetKeyStorage(key);
return iter::filter(
[this, &key, &t](auto vlist) {
auto version = vlist->find(t);
if (version == nullptr) return false;
return Exists(key, version);
},
index->access());
}
/**
* @brief - Return number of items in skiplist associated with the given
* TKey. This number could be imprecise because of the underlying skiplist
* storage. Use this as a hint, and not as a rule.
* Moreover, some transaction probably sees only part of the skiplist since
* not all versions are visible for it. Also, garbage collection might now
* have been run for some time so the index might have accumulated garbage.
* @param key - key to query for.
* @return number of items
*/
auto Count(const TKey &key) { return GetKeyStorage(key)->access().size(); }
private:
/**
* @brief - Get storage for this label. Creates new
* storage if this label is not yet indexed.
* @param label - Label for which to access storage.
* @return pointer to skiplist of version list records<T>.
*/
auto GetKeyStorage(const TKey &key) {
auto access = index_.access();
// Avoid excessive new/delete by first checking if it exists.
auto iter = access.find(key);
if (iter == access.end()) {
auto skiplist = new SkipList<mvcc::VersionList<TRecord> *>;
auto ret = access.insert(key, skiplist);
// In case some other insert managed to create new skiplist we shouldn't
// leak memory and should delete this one accordingly.
if (ret.second == false) delete skiplist;
return ret.first->second;
}
return iter->second;
}
/**
* @brief - Check if Vertex contains label.
* @param label - label to check for.
* @return true if it contains, false otherwise.
*/
bool Exists(const GraphDbTypes::Label &label, const Vertex *v) const {
// We have to check for existance of label because the transaction
// might not see the label, or the label was deleted and not yet
// removed from the index.
auto labels = v->labels_;
return std::find(labels.begin(), labels.end(), label) != labels.end();
}
/**
* @brief - Check if Edge has edge_type.
* @param edge_type - edge_type to check for.
* @return true if it has that edge_type, false otherwise.
*/
bool Exists(const GraphDbTypes::EdgeType &edge_type, const Edge *e) const {
// We have to check for equality of edge types because the transaction
// might not see the edge type, or the edge type was deleted and not yet
// removed from the index.
return e->edge_type_ == edge_type;
}
ConcurrentMap<TKey, SkipList<mvcc::VersionList<TRecord> *> *> index_;
};

View File

@ -1,82 +0,0 @@
#pragma once
#include "cppitertools/filter.hpp"
#include "cppitertools/imap.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
#include "database/graph_db.hpp"
#include "database/graph_db_datatypes.hpp"
#include "mvcc/version_list.hpp"
#include "transactions/transaction.hpp"
/**
* @brief Implements index update and acquire.
* @Tparam T - underlying type in version list.
*/
template <typename T>
class LabelsIndex {
public:
/**
* @brief - Add vlist, if new, to label specific storage.
* @param label - label index to update.
* @param vlist - pointer to vlist entry to add.
*/
void Add(const GraphDbTypes::Label &label, mvcc::VersionList<T> *vlist) {
GetLabel(label)->access().insert(vlist);
}
/**
* @brief - Acquire all the inserted vlists in label specific storage which
* still have that label visible in this transaction.
* @param label - label to query.
* @param t - current transaction, which determines visibility.
* @return iterable collection of vlists records<T> with the requested label.
*/
auto Acquire(const GraphDbTypes::Label &label, const tx::Transaction &t) {
auto label_index = GetLabel(label);
return iter::filter(
[this, &label, &t](auto vlist) {
auto vlist_head = vlist->find(t);
if (vlist_head == nullptr) return false;
auto labels = vlist_head->labels_;
// We have to check for existance of label because the transaction
// might not see the label, or the label was deleted and not yet
// removed from the index.
return std::find(labels.begin(), labels.end(), label) != labels.end();
},
label_index->access());
}
/**
* @brief - Return number of items in skiplist associated with the given
* label. This number could be imprecise because of the underlying skiplist
* storage. Use this as a hint, and not as a rule.
* @param label - label to query for.
* @return number of items
*/
auto Count(const GraphDbTypes::Label &label) {
return GetLabel(label)->access().size();
}
private:
/**
* @brief - Get storage for this label. Creates new
* storage if this label is not yet indexed.
* @param label - Label for which to access storage.
* @return pointer to skiplist of version list records<T>.
*/
auto GetLabel(const GraphDbTypes::Label &label) {
auto access = index_.access();
auto iter = access.find(label);
if (iter == access.end()) {
auto skiplist = new SkipList<mvcc::VersionList<T> *>;
auto ret = access.insert(label, skiplist);
// In case some other insert managed to create new skiplist we shouldn't
// leak memory and should delete this one accordingly.
if (ret.second == false) delete skiplist;
return ret.first->second;
}
return iter->second;
}
ConcurrentMap<GraphDbTypes::Label, SkipList<mvcc::VersionList<T> *> *> index_;
};

View File

@ -1,7 +1,10 @@
#include "storage/edge_accessor.hpp"
#include "database/graph_db_accessor.hpp"
#include "storage/vertex_accessor.hpp"
GraphDbTypes::EdgeType EdgeAccessor::edge_type() const { return current().edge_type_; }
GraphDbTypes::EdgeType EdgeAccessor::edge_type() const {
return current().edge_type_;
}
VertexAccessor EdgeAccessor::from() const {
return VertexAccessor(current().from_, db_accessor());

View File

@ -16,7 +16,7 @@ bool VertexAccessor::add_label(GraphDbTypes::Label label) {
// not a duplicate label, add it
update().labels_.emplace_back(label);
this->db_accessor().update_index(label, *this);
this->db_accessor().update_label_index(label, *this);
return true;
}

View File

@ -0,0 +1,119 @@
#include "gmock/gmock.h"
#include "gtest/gtest.h"
#include "data_structures/ptr_int.hpp"
#include "database/graph_db_accessor.hpp"
#include "dbms/dbms.hpp"
using testing::UnorderedElementsAreArray;
// Test counter of indexed edges with the given edge_type.
TEST(EdgeTypesIndex, Count) {
Dbms dbms;
auto accessor = dbms.active();
const int ITERS = 50;
size_t cnt = 0;
for (int i = 0; i < ITERS; ++i) {
auto vertex1 = accessor->insert_vertex();
auto vertex2 = accessor->insert_vertex();
if (rand() & 1) {
accessor->insert_edge(vertex1, vertex2, accessor->edge_type("test"));
++cnt;
} else {
accessor->insert_edge(vertex1, vertex2, accessor->edge_type("test2"));
}
// Greater or equal since we said that we always estimate at least the
// real number.
EXPECT_GE(accessor->edges_count(accessor->edge_type("test")), cnt);
}
}
// Transaction hasn't ended and so the edge is not visible.
TEST(EdgeTypesIndex, AddGetZeroEdgeTypes) {
Dbms dbms;
auto accessor = dbms.active();
auto vertex1 = accessor->insert_vertex();
auto vertex2 = accessor->insert_vertex();
accessor->insert_edge(vertex1, vertex2, accessor->edge_type("test"));
auto collection = accessor->edges(accessor->edge_type("test"));
std::vector<EdgeAccessor> collection_vector(collection.begin(),
collection.end());
EXPECT_EQ(collection_vector.size(), (size_t)0);
}
// Test edge type index by adding and removing one edge, checking edge_type of
// another, while the third one with an irrelevant edge_type exists.
TEST(LabelsIndex, AddGetRemoveEdgeTypes) {
Dbms dbms;
{
auto accessor = dbms.active();
auto vertex11 = accessor->insert_vertex();
auto vertex12 = accessor->insert_vertex();
accessor->insert_edge(vertex11, vertex12, accessor->edge_type("test"));
auto vertex21 = accessor->insert_vertex();
auto vertex22 = accessor->insert_vertex();
accessor->insert_edge(vertex21, vertex22, accessor->edge_type("test2"));
auto vertex31 = accessor->insert_vertex();
auto vertex32 = accessor->insert_vertex();
accessor->insert_edge(vertex31, vertex32, accessor->edge_type("test"));
accessor->commit();
} // Finish transaction.
{
auto accessor = dbms.active();
auto filtered = accessor->edges(accessor->edge_type("test"));
std::vector<EdgeAccessor> collection(filtered.begin(), filtered.end());
auto edges = accessor->edges();
std::vector<EdgeAccessor> expected_collection;
for (auto edge : edges) {
if (edge.edge_type() == accessor->edge_type("test")) {
expected_collection.push_back(edge);
} else {
EXPECT_TRUE(edge.edge_type() == accessor->edge_type("test2"));
}
}
EXPECT_EQ(expected_collection.size(), collection.size());
EXPECT_TRUE(collection[0].edge_type() == accessor->edge_type("test"));
EXPECT_TRUE(collection[1].edge_type() == accessor->edge_type("test"));
EXPECT_FALSE(collection[0].edge_type() == accessor->edge_type("test2"));
EXPECT_FALSE(collection[1].edge_type() == accessor->edge_type("test2"));
accessor->remove_edge(collection[0]); // Remove from database and test if
// index won't return it.
accessor->remove_edge(collection[1]); // Remove from database and test if
// index won't return it.
accessor->commit();
}
{
auto accessor = dbms.active();
auto filtered = accessor->edges(accessor->edge_type("test"));
std::vector<EdgeAccessor> collection(filtered.begin(), filtered.end());
auto edges = accessor->edges();
std::vector<EdgeAccessor> expected_collection;
for (auto edge : edges) {
if (edge.edge_type() == accessor->edge_type("test")) {
expected_collection.push_back(edge);
} else {
EXPECT_TRUE(edge.edge_type() == accessor->edge_type("test2"));
}
}
// It should be empty since everything with an old edge_type is either
// deleted or doesn't have that edge_type anymore.
EXPECT_EQ(expected_collection.size(), 0);
EXPECT_EQ(collection.size(), 0);
}
}
int main(int argc, char **argv) {
::testing::InitGoogleTest(&argc, argv);
return RUN_ALL_TESTS();
}

View File

@ -23,7 +23,7 @@ TEST(LabelsIndex, Count) {
}
// Greater or equal since we said that we always estimate at least the
// real number.
EXPECT_GE(accessor->vertices_by_label_count(accessor->label("test")), cnt);
EXPECT_GE(accessor->vertices_count(accessor->label("test")), cnt);
}
}
@ -33,9 +33,9 @@ TEST(LabelsIndex, AddGetZeroLabels) {
auto accessor = dbms.active();
auto vertex = accessor->insert_vertex();
vertex.add_label(accessor->label("test"));
accessor->commit();
auto collection = accessor->vertices_by_label(accessor->label("test"));
std::vector<VertexAccessor> collection_vector;
auto collection = accessor->vertices(accessor->label("test"));
std::vector<VertexAccessor> collection_vector(collection.begin(),
collection.end());
EXPECT_EQ(collection_vector.size(), (size_t)0);
}
@ -60,7 +60,7 @@ TEST(LabelsIndex, AddGetRemoveLabel) {
{
auto accessor = dbms.active();
auto filtered = accessor->vertices_by_label(accessor->label("test"));
auto filtered = accessor->vertices(accessor->label("test"));
std::vector<VertexAccessor> collection(filtered.begin(), filtered.end());
auto vertices = accessor->vertices();
@ -89,7 +89,7 @@ TEST(LabelsIndex, AddGetRemoveLabel) {
{
auto accessor = dbms.active();
auto filtered = accessor->vertices_by_label(accessor->label("test"));
auto filtered = accessor->vertices(accessor->label("test"));
std::vector<VertexAccessor> collection(filtered.begin(), filtered.end());
auto vertices = accessor->vertices();