memgraph/src/database/indexes/label_property_index.hpp
Dominik Gleich 842901ecd2 LabelProperty index.
Summary:
Add return values.

After merge.

Inital working version. Still missing comments.

Update documentation.

Add checking for previous vlist and value equality.

After merge.

Remove functor, add boolean ffunction.

Build index.

More functionality. Start implementing tests.

Add tests.

Reviewers: matej.gradicek, mislav.bradac, mferencevic, buda, florijan

Reviewed By: mislav.bradac, buda, florijan

Subscribers: lion, florijan, teon.banek, buda, pullbot

Differential Revision: https://phabricator.memgraph.io/D355
2017-05-16 16:33:20 +02:00

372 lines
14 KiB
C++

#pragma once
#include "data_structures/concurrent/concurrent_map.hpp"
#include "database/graph_db.hpp"
#include "database/graph_db_datatypes.hpp"
#include "database/indexes/index_utils.hpp"
#include "mvcc/version_list.hpp"
#include "storage/edge.hpp"
#include "storage/vertex.hpp"
#include "transactions/transaction.hpp"
#include "utils/total_ordering.hpp"
/**
* @brief Implements LabelPropertyIndex.
*/
class LabelPropertyIndex {
public:
LabelPropertyIndex(){};
LabelPropertyIndex(const LabelPropertyIndex &other) = delete;
LabelPropertyIndex(LabelPropertyIndex &&other) = delete;
LabelPropertyIndex &operator=(const LabelPropertyIndex &other) = delete;
LabelPropertyIndex &operator=(LabelPropertyIndex &&other) = delete;
/**
* @brief - Clear all indices so that we don't leak memory.
*/
~LabelPropertyIndex() {
for (auto key_indices_pair : indices_.access()) {
// Delete skiplist because we created it with a new operator.
delete key_indices_pair.second;
}
}
/**
* @brief - Contain Label + property, to be used as an index key.
*/
class Key : public TotalOrdering<Key> {
public:
const GraphDbTypes::Label label_;
const GraphDbTypes::Property property_;
Key(const GraphDbTypes::Label &label,
const GraphDbTypes::Property &property)
: label_(label), property_(property) {}
// Comparison operators - we need them to keep this sorted inside skiplist.
bool operator<(const Key &other) const {
if (this->label_ != other.label_) return this->label_ < other.label_;
return this->property_ < other.property_;
}
bool operator==(const Key &other) const {
return this->label_ == other.label_ && this->property_ == other.property_;
}
};
/**
* @brief - Creates index with the given key if it doesn't exist. Note that
* you still need to populate the index with existing records.
* @return - True if it created the index, false if it already exists.
*/
bool CreateIndex(const Key &key) {
auto access = indices_.access();
// Avoid creation if it already exists.
auto iter = access.find(key);
if (iter != access.end()) return false;
auto skiplist = new SkipList<IndexEntry>;
auto ret = access.insert(key, skiplist);
// Avoid multithreaded memory leak if we don't delete skiplist and fail the
// insert (some other thread already inserted)
if (ret.second == false) delete skiplist;
return ret.second;
}
/**
* @brief - Notify that the index has been populated with everything it should
* be populated with, and can be used from this moment forward without missing
* any records.
* @param key - index which finished being populated.
*/
void IndexFinishedBuilding(const Key &key) {
ready_for_use_.access().insert(key);
}
/**
* @brief - Updates all indexes which should contain this vertex.
* @param vlist - pointer to vlist entry to add
* @param vertex - pointer to vertex record entry to add (contained in vlist)
*/
void UpdateOnLabelProperty(mvcc::VersionList<Vertex> *const vlist,
const Vertex *const vertex) {
const auto &labels = vertex->labels_;
for (auto index : indices_.access()) {
// Vertex has the given label
if (std::find(labels.begin(), labels.end(), index.first.label_) ==
labels.end())
continue;
auto prop = vertex->properties_.at(index.first.property_);
if (prop.type() != PropertyValue::Type::Null) {
// Property exists and vertex should be added to skiplist.
Insert(*index.second, prop, vlist, vertex);
}
}
}
/**
* @brief - Updates all indexes with `label` and any property in `vertex` that
* exists.
* @param label - indexes with this label might be updated if vertex contains
* the corresponding property.
* @param vlist - pointer to vlist entry to add
* @param vertex - pointer to vertex record entry to add (contained in vlist)
*/
void UpdateOnLabel(const GraphDbTypes::Label &label,
mvcc::VersionList<Vertex> *const vlist,
const Vertex *const vertex) {
for (auto index : indices_.access()) {
if (index.first.label_ != label) continue;
auto prop = vertex->properties_.at(index.first.property_);
if (prop.type() != PropertyValue::Type::Null) {
// Property exists and vertex should be added to skiplist.
Insert(*index.second, prop, vlist, vertex);
}
}
}
/**
* @brief - Updates all indexes with `property` and any label in `vertex` that
* exists.
* @param property - indexes with this property might be updated if vertex
* contains the corresponding label.
* @param vlist - pointer to vlist entry to add
* @param vertex - pointer to vertex record entry to add (contained in vlist)
*/
void UpdateOnProperty(const GraphDbTypes::Property &property,
mvcc::VersionList<Vertex> *const vlist,
const Vertex *const vertex) {
const auto &labels = vertex->labels_;
for (auto index : indices_.access()) {
if (index.first.property_ != property) continue;
if (std::find(labels.begin(), labels.end(), index.first.label_) !=
labels.end()) {
// Label exists and vertex should be added to skiplist.
Insert(*index.second, vertex->properties_.at(property), vlist, vertex);
}
}
}
/**
* @brief - Get all the inserted vlists in key specific storage which still
* have that label and property visible in this transaction.
* @param key - Label+Property to query.
* @param t - current transaction, which determines visibility.
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
* @return iterable collection of vlists of vertex records with the requested
* key sorted ascendingly by the property value.
*/
auto GetVlists(const Key &key, const tx::Transaction &t,
bool current_state = false) {
debug_assert(ready_for_use_.access().contains(key), "Index not yet ready.");
return IndexUtils::GetVlists<IndexEntry, Vertex>(
*GetKeyStorage(key), t,
[this, key](const IndexEntry &entry, const Vertex *const vertex) {
return Exists(key, entry.value_, vertex);
},
current_state);
}
/**
* @brief - Check for existance of index.
* @param key - Index key
* @return true if the index with that key exists
*/
bool IndexExists(const Key &key) {
return ready_for_use_.access().contains(key);
}
/**
* @brief - Return number of items in skiplist associated with the given
* key. This number could be imprecise because of the underlying skiplist
* storage. Use this as a hint, and not as a rule. Fails if index doesn't
* exist.
* Moreover, some transaction probably sees only part of the skiplist since
* not all versions are visible for it. Also, garbage collection might now
* have been run for some time so the index might have accumulated garbage.
* @param key - key to query for.
* @return number of items
*/
size_t Count(const Key &key) {
auto index = GetKeyStorage(key);
permanent_assert(index != nullptr, "Index doesn't exist.");
debug_assert(ready_for_use_.access().contains(key), "Index not yet ready.");
return index->access().size();
}
/**
* @brief - Removes from the index all entries for which records don't contain
* the given label anymore, or the record was deleted before this transaction
* id.
* @param id - oldest active id, safe to remove everything deleted before this
* id.
*/
void Refresh(const Id &id, tx::Engine &engine) {
return IndexUtils::Refresh<Key, IndexEntry, Vertex>(
indices_, id, engine, [this](const Key &key, const IndexEntry &entry) {
return Exists(key, entry.value_, entry.record_);
});
}
private:
/**
* @brief - Contains value, vlist and vertex record to distinguish between
* index entries.
*/
class IndexEntry : public TotalOrdering<IndexEntry> {
public:
IndexEntry(const IndexEntry &entry, const Vertex *new_record)
: IndexEntry(entry.value_, entry.vlist_, new_record) {}
IndexEntry(const PropertyValue &value, mvcc::VersionList<Vertex> *vlist,
const Vertex *record)
: value_(value), vlist_(vlist), record_(record) {}
// Comparision operators - we need them to keep this sorted inside
// skiplist.
bool operator<(const IndexEntry &other) const {
bool this_value_smaller = Cmp(this->value_, other.value_);
if (this_value_smaller || Cmp(other.value_, this->value_))
return this_value_smaller;
if (this->vlist_ != other.vlist_) return this->vlist_ < other.vlist_;
return this->record_ < other.record_;
}
bool operator==(const IndexEntry &other) const {
return !(*this < other) && !(other < *this);
}
/**
* @brief - For two property values - orders the records by type and then by
* value. Except for integers and doubles - those are both converted to
* double and then compared.
* @return true if the first property value is smaller( should be before)
* than the second one
*/
static bool Cmp(const PropertyValue &a, const PropertyValue &b) {
if (a.type() != b.type() &&
!(IsCastableToDouble(a) && IsCastableToDouble(b)))
return a.type() < b.type();
if (a.type() == b.type()) {
switch (a.type()) {
case PropertyValue::Type::Null:
return false;
case PropertyValue::Type::String:
return a.Value<std::string>() < b.Value<std::string>();
case PropertyValue::Type::Bool:
return a.Value<bool>() < b.Value<bool>();
case PropertyValue::Type::Int:
return a.Value<int64_t>() < b.Value<int64_t>();
case PropertyValue::Type::Double:
return a.Value<double>() < b.Value<double>();
case PropertyValue::Type::List: {
auto va = a.Value<std::vector<PropertyValue>>();
auto vb = b.Value<std::vector<PropertyValue>>();
if (va.size() != vb.size()) return va.size() < vb.size();
return lexicographical_compare(va.begin(), va.end(), vb.begin(),
vb.end(), Cmp);
}
default:
permanent_fail("Unimplemented type operator.");
}
}
// Types are int and double - convert int to double
return GetDouble(a) < GetDouble(b);
}
/**
* @brief - Return value casted to double. This is only possible for
* integers and doubles.
*/
static double GetDouble(const PropertyValue &value) {
debug_assert(value.type() == PropertyValue::Type::Int ||
value.type() == PropertyValue::Type::Double,
"Invalid data type.");
if (value.type() == PropertyValue::Type::Int)
return static_cast<double>(value.Value<int64_t>());
return value.Value<double>();
}
/**
* @brief - Return if this value is castable to double (returns true for
* integers and doubles).
*/
static bool IsCastableToDouble(const PropertyValue &value) {
return value.type() == PropertyValue::Type::Int ||
value.type() == PropertyValue::Type::Double;
}
/**
* @brief - Check if previous IndexEntry represents the same vlist/value
* pair.
* @return - true if IndexEntries are equal by the vlist/value pair.
*/
bool IsAlreadyChecked(const IndexEntry &previous) const {
return previous.vlist_ == this->vlist_ &&
!Cmp(previous.value_, this->value_) &&
!Cmp(this->value_, previous.value_);
}
const PropertyValue value_;
mvcc::VersionList<Vertex> *const vlist_{nullptr};
const Vertex *const record_{nullptr};
};
/**
* @brief - Insert value, vlist, vertex into corresponding index (key) if the
* index exists.
* @param index - into which index to add
* @param value - value which to add
* @param vlist - pointer to vlist entry to add
* @param vertex - pointer to vertex record entry to add (contained in vlist)
*/
void Insert(SkipList<IndexEntry> &index, const PropertyValue &value,
mvcc::VersionList<Vertex> *const vlist,
const Vertex *const vertex) {
index.access().insert(IndexEntry(value, vlist, vertex));
}
/**
* @brief - Get storage for this key.
* @param key - Label and and property for which to query.
* @return pointer to skiplist of IndexEntries, if none which matches key
* exists return nullptr
*/
SkipList<IndexEntry> *GetKeyStorage(const Key &key) {
auto access = indices_.access();
auto iter = access.find(key);
if (iter == access.end()) return nullptr;
return iter->second;
}
/**
* @brief - Check if Vertex contains label and property with the given
* value.
* @param key - label and parameter to check for.
* @param value - value of parameter to compare
* @return true if it contains, false otherwise.
*/
bool Exists(const Key &key, const PropertyValue &value,
const Vertex *const v) const {
debug_assert(v != nullptr, "Vertex is nullptr.");
// We have to check for existance of label because the transaction
// might not see the label, or the label was deleted and not yet
// removed from the index.
const auto &labels = v->labels_;
if (std::find(labels.begin(), labels.end(), key.label_) == labels.end())
return false;
auto prop = v->properties_.at(key.property_);
// Property doesn't exists.
if (prop.type() == PropertyValue::Type::Null) return false;
// Property value is the same as expected.
return !IndexEntry::Cmp(prop, value) && !IndexEntry::Cmp(value, prop);
}
ConcurrentMap<Key, SkipList<IndexEntry> *> indices_;
ConcurrentSet<Key> ready_for_use_;
};