memgraph/src/database/indexes/label_property_index.hpp
florijan 813d37e939 Migrate db::types to storage::
Reviewers: teon.banek, dgleich

Reviewed By: teon.banek, dgleich

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D1110
2018-01-17 10:35:12 +01:00

546 lines
22 KiB
C++

#pragma once
#include <experimental/optional>
#include "data_structures/concurrent/concurrent_map.hpp"
#include "database/graph_db.hpp"
#include "database/indexes/index_common.hpp"
#include "mvcc/version_list.hpp"
#include "storage/edge.hpp"
#include "storage/types.hpp"
#include "storage/vertex.hpp"
#include "transactions/transaction.hpp"
#include "utils/bound.hpp"
#include "utils/total_ordering.hpp"
namespace database {
/**
* @brief Implements LabelPropertyIndex.
* Currently this provides implementation for:
* acquiring all entries which contain the given label, and a given property
* sorted by the property value
* acquiring all non-unique entries with the given label, and property, with
* exactly one property value
*/
class LabelPropertyIndex {
public:
LabelPropertyIndex(){};
LabelPropertyIndex(const LabelPropertyIndex &other) = delete;
LabelPropertyIndex(LabelPropertyIndex &&other) = delete;
LabelPropertyIndex &operator=(const LabelPropertyIndex &other) = delete;
LabelPropertyIndex &operator=(LabelPropertyIndex &&other) = delete;
/**
* @brief - Clear all indices so that we don't leak memory.
*/
~LabelPropertyIndex() {
for (auto key_indices_pair : indices_.access()) {
// Delete skiplist because we created it with a new operator.
delete key_indices_pair.second;
}
}
/**
* @brief - Contain Label + property, to be used as an index key.
*/
class Key : public TotalOrdering<Key> {
public:
const storage::Label label_;
const storage::Property property_;
Key(storage::Label label, storage::Property property)
: label_(label), property_(property) {}
// Comparison operators - we need them to keep this sorted inside skiplist.
bool operator<(const Key &other) const {
if (this->label_ != other.label_) return this->label_ < other.label_;
return this->property_ < other.property_;
}
bool operator==(const Key &other) const {
return this->label_ == other.label_ && this->property_ == other.property_;
}
};
/**
* @brief - Creates index with the given key if it doesn't exist. Note that
* you still need to populate the index with existing records.
* @return - True if it created the index, false if it already exists.
*/
bool CreateIndex(const Key &key) {
auto access = indices_.access();
// Avoid creation if it already exists.
auto iter = access.find(key);
if (iter != access.end()) return false;
auto skiplist = new SkipList<IndexEntry>;
auto ret = access.insert(key, skiplist);
// Avoid multithreaded memory leak if we don't delete skiplist and fail the
// insert (some other thread already inserted)
if (ret.second == false) delete skiplist;
return ret.second;
}
/**
* @brief - Notify that the index has been populated with everything it should
* be populated with, and can be used from this moment forward without missing
* any records.
* @param key - index which finished being populated.
*/
void IndexFinishedBuilding(const Key &key) {
ready_for_use_.access().insert(key);
}
/**
* @brief - Updates all indexes which should contain this vertex.
* @param vlist - pointer to vlist entry to add
* @param vertex - pointer to vertex record entry to add (contained in vlist)
*/
void UpdateOnLabelProperty(mvcc::VersionList<Vertex> *const vlist,
const Vertex *const vertex) {
const auto &labels = vertex->labels_;
for (auto index : indices_.access()) {
// Vertex has the given label
if (std::find(labels.begin(), labels.end(), index.first.label_) ==
labels.end())
continue;
auto prop = vertex->properties_.at(index.first.property_);
if (prop.type() != PropertyValue::Type::Null) {
// Property exists and vertex should be added to skiplist.
Insert(*index.second, prop, vlist, vertex);
}
}
}
/**
* @brief - Updates all indexes with `label` and any property in `vertex` that
* exists.
* @param label - indexes with this label might be updated if vertex contains
* the corresponding property.
* @param vlist - pointer to vlist entry to add
* @param vertex - pointer to vertex record entry to add (contained in vlist)
*/
void UpdateOnLabel(storage::Label label,
mvcc::VersionList<Vertex> *const vlist,
const Vertex *const vertex) {
for (auto index : indices_.access()) {
if (index.first.label_ != label) continue;
auto prop = vertex->properties_.at(index.first.property_);
if (prop.type() != PropertyValue::Type::Null) {
// Property exists and vertex should be added to skiplist.
Insert(*index.second, prop, vlist, vertex);
}
}
}
/**
* @brief - Updates all indexes with `property` and any label in `vertex` that
* exists.
* @param property - indexes with this property might be updated if vertex
* contains the corresponding label.
* @param vlist - pointer to vlist entry to add
* @param vertex - pointer to vertex record entry to add (contained in vlist)
*/
void UpdateOnProperty(storage::Property property,
mvcc::VersionList<Vertex> *const vlist,
const Vertex *const vertex) {
const auto &labels = vertex->labels_;
for (auto index : indices_.access()) {
if (index.first.property_ != property) continue;
if (std::find(labels.begin(), labels.end(), index.first.label_) !=
labels.end()) {
// Label exists and vertex should be added to skiplist.
Insert(*index.second, vertex->properties_.at(property), vlist, vertex);
}
}
}
/**
* @brief - Get all the inserted vlists in key specific storage which still
* have that label and property visible in this transaction.
* @param key - Label+Property to query.
* @param t - current transaction, which determines visibility.
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
* @return iterable collection of vlists of vertex records with the requested
* key sorted ascendingly by the property value.
*/
auto GetVlists(const Key &key, const tx::Transaction &t, bool current_state) {
DCHECK(ready_for_use_.access().contains(key)) << "Index not yet ready.";
auto access = GetKeyStorage(key)->access();
auto begin = access.begin();
return index::GetVlists<typename SkipList<IndexEntry>::Iterator, IndexEntry,
Vertex, SkipList<IndexEntry>>(
std::move(access), begin, [](const IndexEntry &) { return true; }, t,
[key](const IndexEntry &entry, const Vertex *const vertex) {
return LabelPropertyIndex::Exists(key, entry.value_, vertex);
},
current_state);
}
/**
* @brief - Get all the inserted vlists in key specific storage which still
* have that label and property visible in this transaction with property
* value equal to 'value'.
* @param key - Label+Property to query.
* @param value - vlists with this value will be returned
* @param t - current transaction, which determines visibility.
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
* @return iterable collection of vlists of vertex records with the requested
* key and value
*/
auto GetVlists(const Key &key, const PropertyValue &value,
const tx::Transaction &t, bool current_state) {
DCHECK(ready_for_use_.access().contains(key)) << "Index not yet ready.";
auto access = GetKeyStorage(key)->access();
auto min_ptr = std::numeric_limits<std::uintptr_t>::min();
auto start_iter = access.find_or_larger(IndexEntry(
value, reinterpret_cast<mvcc::VersionList<Vertex> *>(min_ptr),
reinterpret_cast<const Vertex *>(min_ptr)));
return index::GetVlists<typename SkipList<IndexEntry>::Iterator, IndexEntry,
Vertex>(
std::move(access), start_iter,
[value](const IndexEntry &entry) {
return !IndexEntry::Less(value, entry.value_) &&
!IndexEntry::Less(entry.value_, value);
},
t,
[key](const IndexEntry &entry, const Vertex *const vertex) {
return LabelPropertyIndex::Exists(key, entry.value_, vertex);
},
current_state);
}
/**
* Get an iterable over all mvcc::VersionLists that are contained in this
* index and satisfy the given bounds.
*
* The returned iterator will only contain vertices/edges whose property value
* is comparable with the given bounds (w.r.t. type). This has implications on
* Cypher query execuction semantics which have not been resolved yet.
*
* At least one of the bounds must be specified. Bounds can't be @c
* PropertyValue::Null. If both bounds are specified, their PropertyValue
* elements must be of comparable types.
*
* @param key - Label+Property to query.
* @param lower - Lower bound of the interval.
* @param upper - Upper bound of the interval.
* @param t - current transaction, which determines visibility.
* @param current_state If true then the graph state for the
* current transaction+command is returned (insertions, updates and
* deletions performed in the current transaction+command are not
* ignored).
* @return iterable collection of mvcc:VersionLists pointers that
* satisfy the bounds and are visible to the given transaction.
*/
auto GetVlists(
const Key &key,
const std::experimental::optional<utils::Bound<PropertyValue>> lower,
const std::experimental::optional<utils::Bound<PropertyValue>> upper,
const tx::Transaction &transaction, bool current_state) {
DCHECK(ready_for_use_.access().contains(key)) << "Index not yet ready.";
auto type = [](const auto &bound) { return bound.value().value().type(); };
CHECK(lower || upper) << "At least one bound must be provided";
CHECK(!lower || type(lower) != PropertyValue::Type::Null)
<< "Null value is not a valid index bound";
CHECK(!upper || type(upper) != PropertyValue::Type::Null)
<< "Null value is not a valid index bound";
// helper function for creating a bound with an IndexElement
auto make_index_bound = [](const auto &optional_bound, bool bottom) {
std::uintptr_t ptr_bound =
bottom ? std::numeric_limits<std::uintptr_t>::min()
: std::numeric_limits<std::uintptr_t>::max();
return IndexEntry(
optional_bound.value().value(),
reinterpret_cast<mvcc::VersionList<Vertex> *>(ptr_bound),
reinterpret_cast<const Vertex *>(ptr_bound));
};
auto access = GetKeyStorage(key)->access();
// create the iterator startpoint based on the lower bound
auto start_iter = lower
? access.find_or_larger(make_index_bound(
lower, lower.value().IsInclusive()))
: access.begin();
// a function that defines if an entry staisfies the filtering predicate.
// since we already handled the lower bound, we only need to deal with the
// upper bound and value type
std::function<bool(const IndexEntry &entry)> predicate;
if (lower && upper &&
!PropertyValue::AreComparableTypes(type(lower), type(upper)))
predicate = [](const IndexEntry &) { return false; };
else if (upper) {
auto upper_index_entry =
make_index_bound(upper, upper.value().IsExclusive());
predicate = [upper_index_entry](const IndexEntry &entry) {
return PropertyValue::AreComparableTypes(
entry.value_.type(), upper_index_entry.value_.type()) &&
entry < upper_index_entry;
};
} else {
auto lower_type = type(lower);
make_index_bound(lower, lower.value().IsExclusive());
predicate = [lower_type](const IndexEntry &entry) {
return PropertyValue::AreComparableTypes(entry.value_.type(),
lower_type);
};
}
return index::GetVlists<typename SkipList<IndexEntry>::Iterator, IndexEntry,
Vertex>(
std::move(access), start_iter, predicate, transaction,
[key](const IndexEntry &entry, const Vertex *const vertex) {
return LabelPropertyIndex::Exists(key, entry.value_, vertex);
},
current_state);
}
/**
* @brief - Check for existance of index.
* @param key - Index key
* @return true if the index with that key exists
*/
bool IndexExists(const Key &key) {
return ready_for_use_.access().contains(key);
}
/**
* @brief - Return number of items in skiplist associated with the given
* key. This number could be imprecise because of the underlying skiplist
* storage. Use this as a hint, and not as a rule. Fails if index doesn't
* exist.
* Moreover, some transaction probably sees only part of the skiplist since
* not all versions are visible for it. Also, garbage collection might now
* have been run for some time so the index might have accumulated garbage.
* @param key - key to query for.
* @return number of items
*/
int64_t Count(const Key &key) {
auto index = GetKeyStorage(key);
CHECK(index != nullptr) << "Index doesn't exist.";
CHECK(ready_for_use_.access().contains(key)) << "Index not yet ready.";
return index->access().size();
}
/**
* Returns the approximate position and count of the given value in the
* index for the given Key.
*
* Both are approximations for several reasons. Initially the position
* and count are obtained from the skipist (the index) and as such are
* not exact for perfromance reasons. At the same time the position
* and count are calculated based on property value comparison: an
* additional error is accumulated because the index could contain
* the same vertex with the same value multiple times,
* as well as the same vertex with different values.
*/
auto PositionAndCount(const Key &key, const PropertyValue &value) {
auto access = GetKeyStorage(key)->access();
return access.position_and_count(
value,
// the 'less' function
[](const PropertyValue &a, const IndexEntry &b) {
return IndexEntry::Less(a, b.value_);
},
// the 'equal_to' function
[](const PropertyValue &a, const IndexEntry &b) {
return !(IndexEntry::Less(a, b.value_) ||
IndexEntry::Less(b.value_, a));
});
}
/**
* @brief - Removes from the index all entries for which records don't contain
* the given label anymore, or the record was deleted before this transaction
* id.
*
* @param snapshot - the GC snapshot. Consists of the oldest active
* transaction's snapshot, with that transaction's id appened as last.
*/
void Refresh(const tx::Snapshot &snapshot, tx::Engine &engine) {
return index::Refresh<Key, IndexEntry, Vertex>(
indices_, snapshot, engine,
[](const Key &key, const IndexEntry &entry) {
return LabelPropertyIndex::Exists(key, entry.value_, entry.record_);
});
}
/**
* Returns a vector of keys present in this index.
*/
std::vector<Key> Keys() {
std::vector<Key> keys;
for (auto &kv : indices_.access()) keys.push_back(kv.first);
return keys;
}
private:
/**
* @brief - Contains value, vlist and vertex record to distinguish between
* index entries.
*/
class IndexEntry : public TotalOrdering<IndexEntry> {
public:
IndexEntry(const IndexEntry &entry, const Vertex *new_record)
: IndexEntry(entry.value_, entry.vlist_, new_record) {}
IndexEntry(const PropertyValue &value, mvcc::VersionList<Vertex> *vlist,
const Vertex *record)
: value_(value), vlist_(vlist), record_(record) {}
// Comparision operators - we need them to keep this sorted inside
// skiplist.
bool operator<(const IndexEntry &other) const {
bool this_value_smaller = Less(this->value_, other.value_);
if (this_value_smaller || Less(other.value_, this->value_))
return this_value_smaller;
if (this->vlist_ != other.vlist_) return this->vlist_ < other.vlist_;
return this->record_ < other.record_;
}
bool operator==(const IndexEntry &other) const {
return !(*this < other) && !(other < *this);
}
/**
* @brief - For two property values - orders the records by type and then by
* value. Except for integers and doubles - those are both converted to
* double and then compared.
* @return true if the first property value is smaller( should be before)
* than the second one
*/
static bool Less(const PropertyValue &a, const PropertyValue &b) {
if (!PropertyValue::AreComparableTypes(a.type(), b.type()))
return a.type() < b.type();
if (a.type() == b.type()) {
switch (a.type()) {
case PropertyValue::Type::Null:
return false;
case PropertyValue::Type::String:
return a.Value<std::string>() < b.Value<std::string>();
case PropertyValue::Type::Bool:
return a.Value<bool>() < b.Value<bool>();
case PropertyValue::Type::Int:
return a.Value<int64_t>() < b.Value<int64_t>();
case PropertyValue::Type::Double:
return a.Value<double>() < b.Value<double>();
case PropertyValue::Type::List: {
auto va = a.Value<std::vector<PropertyValue>>();
auto vb = b.Value<std::vector<PropertyValue>>();
if (va.size() != vb.size()) return va.size() < vb.size();
return lexicographical_compare(va.begin(), va.end(), vb.begin(),
vb.end(), Less);
}
case PropertyValue::Type::Map: {
auto ma = a.Value<std::map<std::string, PropertyValue>>();
auto mb = b.Value<std::map<std::string, PropertyValue>>();
if (ma.size() != mb.size()) return ma.size() < mb.size();
const auto cmp = [](const auto &a, const auto &b) {
if (a.first != b.first)
return a.first < b.first;
else
return Less(a.second, b.second);
};
return lexicographical_compare(ma.begin(), ma.end(), mb.begin(),
mb.end(), cmp);
}
}
}
// helper for getting a double from PropertyValue, if possible
auto get_double = [](const PropertyValue &value) {
DCHECK(value.type() == PropertyValue::Type::Int ||
value.type() == PropertyValue::Type::Double)
<< "Invalid data type.";
if (value.type() == PropertyValue::Type::Int)
return static_cast<double>(value.Value<int64_t>());
return value.Value<double>();
};
// Types are int and double - convert int to double
return get_double(a) < get_double(b);
}
/**
* @brief - Check if previous IndexEntry represents the same vlist/value
* pair.
* @return - true if IndexEntries are equal by the vlist/value pair.
*/
bool IsAlreadyChecked(const IndexEntry &previous) const {
return previous.vlist_ == this->vlist_ &&
!Less(previous.value_, this->value_) &&
!Less(this->value_, previous.value_);
}
const PropertyValue value_;
mvcc::VersionList<Vertex> *const vlist_{nullptr};
const Vertex *const record_{nullptr};
};
/**
* @brief - Insert value, vlist, vertex into corresponding index (key) if
* the index exists.
* @param index - into which index to add
* @param value - value which to add
* @param vlist - pointer to vlist entry to add
* @param vertex - pointer to vertex record entry to add (contained in
* vlist)
*/
void Insert(SkipList<IndexEntry> &index, const PropertyValue &value,
mvcc::VersionList<Vertex> *const vlist,
const Vertex *const vertex) {
index.access().insert(IndexEntry(value, vlist, vertex));
}
/**
* @brief - Get storage for this key.
* @param key - Label and and property for which to query.
* @return pointer to skiplist of IndexEntries, if none which matches key
* exists return nullptr
*/
SkipList<IndexEntry> *GetKeyStorage(const Key &key) {
auto access = indices_.access();
auto iter = access.find(key);
if (iter == access.end()) return nullptr;
return iter->second;
}
/**
* @brief - Check if Vertex contains label and property with the given
* value.
* @param key - label and property to check for.
* @param value - value of property to compare
* @return true if it contains, false otherwise.
*/
static bool Exists(const Key &key, const PropertyValue &value,
const Vertex *const v) {
DCHECK(v != nullptr) << "Vertex is nullptr.";
// We have to check for existance of label because the transaction
// might not see the label, or the label was deleted and not yet
// removed from the index.
const auto &labels = v->labels_;
if (std::find(labels.begin(), labels.end(), key.label_) == labels.end())
return false;
auto prop = v->properties_.at(key.property_);
// Property doesn't exists.
if (prop.type() == PropertyValue::Type::Null) return false;
// Property value is the same as expected.
return !IndexEntry::Less(prop, value) && !IndexEntry::Less(value, prop);
}
ConcurrentMap<Key, SkipList<IndexEntry> *> indices_;
ConcurrentSet<Key> ready_for_use_;
};
} // namespace database