diff --git a/src/data_structures/concurrent/skiplist.hpp b/src/data_structures/concurrent/skiplist.hpp index d52267b8c..464397fa6 100644 --- a/src/data_structures/concurrent/skiplist.hpp +++ b/src/data_structures/concurrent/skiplist.hpp @@ -548,19 +548,24 @@ class SkipList : private Lockable<lock_t> { * if `less` indicates that X is less than * Y, then natural comparison must indicate the same. The * reverse does not have to hold. + * @param greater Comparsion function, analogue to less. * @param position_level_reduction - Defines at which level * item position is estimated. Position level is defined * as log2(skiplist->size()) - position_level_reduction. * @param count_max_level - Defines the max level at which * item count is estimated. - * @tparam TLess Type of `less` + * @tparam TItem - type of item skiplist elements are compared + * to. Does not have to be the same type as skiplist element. + * @tparam TLess - less comparison function type. + * @tparam TEqual - equality comparison function type. * @return A pair of ints where the first element is the estimated * position of item, and the second is the estimated number * of items that are the same according to `less`. */ - template <typename TItem, typename TLess = std::less<T>> - std::pair<size_t, size_t> position_and_count( - const TItem &item, TLess less = TLess{}, + template <typename TItem, typename TLess = std::less<T>, + typename TEqual = std::equal_to<T>> + std::pair<int64_t, int64_t> position_and_count( + const TItem &item, TLess less = TLess(), TEqual equal = TEqual(), int position_level_reduction = 10, int count_max_level = 3) { // the level at which position will be sought int position_level = std::max( @@ -579,9 +584,10 @@ class SkipList : private Lockable<lock_t> { // on the current height (i) find the last tower // whose value is lesser than item, store it in pred // while succ will be either skiplist end or the - // first element greater or equal to item + // first element greater than item succ = pred->forward(i); - while (succ && less(succ->value(), item)) { + while (succ && + !(less(item, succ->value()) || equal(item, succ->value()))) { pred = succ; succ = succ->forward(i); tower_count++; diff --git a/src/database/graph_db_accessor.cpp b/src/database/graph_db_accessor.cpp index c5184f935..c282c38da 100644 --- a/src/database/graph_db_accessor.cpp +++ b/src/database/graph_db_accessor.cpp @@ -60,15 +60,16 @@ void GraphDbAccessor::update_property_index( property, record_accessor.vlist_, vertex); } -size_t GraphDbAccessor::vertices_count() const { +int64_t GraphDbAccessor::vertices_count() const { return db_.vertices_.access().size(); } -size_t GraphDbAccessor::vertices_count(const GraphDbTypes::Label &label) const { +int64_t GraphDbAccessor::vertices_count( + const GraphDbTypes::Label &label) const { return db_.labels_index_.Count(label); } -size_t GraphDbAccessor::vertices_count( +int64_t GraphDbAccessor::vertices_count( const GraphDbTypes::Label &label, const GraphDbTypes::Property &property) const { const LabelPropertyIndex::Key key(label, property); @@ -77,6 +78,51 @@ size_t GraphDbAccessor::vertices_count( return db_.label_property_index_.Count(key); } +int64_t GraphDbAccessor::vertices_count(const GraphDbTypes::Label &label, + const GraphDbTypes::Property &property, + const PropertyValue &value) const { + const LabelPropertyIndex::Key key(label, property); + debug_assert(db_.label_property_index_.IndexExists(key), + "Index doesn't exist."); + return db_.label_property_index_.PositionAndCount(key, value).second; +} + +int64_t GraphDbAccessor::vertices_count( + const GraphDbTypes::Label &label, const GraphDbTypes::Property &property, + const std::experimental::optional<utils::Bound<PropertyValue>> lower, + const std::experimental::optional<utils::Bound<PropertyValue>> upper) + const { + const LabelPropertyIndex::Key key(label, property); + debug_assert(db_.label_property_index_.IndexExists(key), + "Index doesn't exist."); + debug_assert(lower || upper, "At least one bound must be provided"); + + if (!upper) { + auto lower_pac = + db_.label_property_index_.PositionAndCount(key, lower.value().value()); + int64_t size = db_.label_property_index_.Count(key); + return std::max(0l, + size - lower_pac.first - + (lower.value().IsInclusive() ? 0l : lower_pac.second)); + + } else if (!lower) { + auto upper_pac = + db_.label_property_index_.PositionAndCount(key, upper.value().value()); + return upper.value().IsInclusive() ? upper_pac.first + upper_pac.second + : upper_pac.first; + + } else { + auto lower_pac = + db_.label_property_index_.PositionAndCount(key, lower.value().value()); + auto upper_pac = + db_.label_property_index_.PositionAndCount(key, upper.value().value()); + auto result = upper_pac.first - lower_pac.first; + if (lower.value().IsExclusive()) result -= lower_pac.second; + if (upper.value().IsInclusive()) result += upper_pac.second; + return std::max(0l, result); + } +} + bool GraphDbAccessor::remove_vertex(VertexAccessor &vertex_accessor) { vertex_accessor.SwitchNew(); // it's possible the vertex was removed already in this transaction @@ -133,11 +179,11 @@ void GraphDbAccessor::update_edge_type_index( this->db_.edge_types_index_.Update(edge_type, edge_accessor.vlist_, edge); } -size_t GraphDbAccessor::edges_count() const { +int64_t GraphDbAccessor::edges_count() const { return db_.edges_.access().size(); } -size_t GraphDbAccessor::edges_count( +int64_t GraphDbAccessor::edges_count( const GraphDbTypes::EdgeType &edge_type) const { return db_.edge_types_index_.Count(edge_type); } diff --git a/src/database/graph_db_accessor.hpp b/src/database/graph_db_accessor.hpp index 333a5ab70..ec87fddd5 100644 --- a/src/database/graph_db_accessor.hpp +++ b/src/database/graph_db_accessor.hpp @@ -5,14 +5,16 @@ #pragma once +#include <experimental/optional> + #include "cppitertools/filter.hpp" #include "cppitertools/imap.hpp" #include "graph_db.hpp" -#include "transactions/transaction.hpp" - #include "storage/edge_accessor.hpp" #include "storage/vertex_accessor.hpp" +#include "transactions/transaction.hpp" +#include "utils/bound.hpp" /** Thrown when creating an index which already exists. */ class IndexExistsException : public utils::BasicException { @@ -310,13 +312,13 @@ class GraphDbAccessor { * Return approximate number of all vertices in the database. * Note that this is always an over-estimate and never an under-estimate. */ - size_t vertices_count() const; + int64_t vertices_count() const; /* * Return approximate number of all edges in the database. * Note that this is always an over-estimate and never an under-estimate. */ - size_t edges_count() const; + int64_t edges_count() const; /** * Return approximate number of vertices under indexes with the given label. @@ -324,7 +326,7 @@ class GraphDbAccessor { * @param label - label to check for * @return number of vertices with the given label */ - size_t vertices_count(const GraphDbTypes::Label &label) const; + int64_t vertices_count(const GraphDbTypes::Label &label) const; /** * Return approximate number of vertices under indexes with the given label @@ -335,8 +337,32 @@ class GraphDbAccessor { * @return number of vertices with the given label, fails if no such * label+property index exists. */ - size_t vertices_count(const GraphDbTypes::Label &label, - const GraphDbTypes::Property &property) const; + int64_t vertices_count(const GraphDbTypes::Label &label, + const GraphDbTypes::Property &property) const; + + /** + * Returns approximate number of vertices that have the given label + * and the given value for the given property. + * + * Assumes that an index for that (label, property) exists. + */ + int64_t vertices_count(const GraphDbTypes::Label &label, + const GraphDbTypes::Property &property, + const PropertyValue &value) const; + + /** + * Returns approximate number of vertices that have the given label + * and whose vaue is in the range defined by upper and lower @c Bound. + * At least one bound must be specified. If lower bound is not specified, + * the whole upper bound prefix is returned. + * + * Assumes that an index for that (label, property) exists. + */ + int64_t vertices_count( + const GraphDbTypes::Label &label, const GraphDbTypes::Property &property, + const std::experimental::optional<utils::Bound<PropertyValue>> lower, + const std::experimental::optional<utils::Bound<PropertyValue>> upper) + const; /** * Return approximate number of edges under indexes with the given edge_type. @@ -344,7 +370,7 @@ class GraphDbAccessor { * @param edge_type - edge_type to check for * @return number of edges with the given edge_type */ - size_t edges_count(const GraphDbTypes::EdgeType &edge_type) const; + int64_t edges_count(const GraphDbTypes::EdgeType &edge_type) const; /** * Obtains the Label for the label's name. diff --git a/src/database/indexes/label_property_index.hpp b/src/database/indexes/label_property_index.hpp index ca8c1849f..9ab8be190 100644 --- a/src/database/indexes/label_property_index.hpp +++ b/src/database/indexes/label_property_index.hpp @@ -202,8 +202,8 @@ class LabelPropertyIndex { IndexEntry, Vertex>( std::move(access), start_iter, [value](const IndexEntry &entry) { - return !IndexEntry::Cmp(value, entry.value_) && - !IndexEntry::Cmp(entry.value_, value); + return !IndexEntry::Less(value, entry.value_) && + !IndexEntry::Less(entry.value_, value); }, t, [key](const IndexEntry &entry, const Vertex *const vertex) { @@ -232,13 +232,40 @@ class LabelPropertyIndex { * @param key - key to query for. * @return number of items */ - size_t Count(const Key &key) { + int64_t Count(const Key &key) { auto index = GetKeyStorage(key); permanent_assert(index != nullptr, "Index doesn't exist."); debug_assert(ready_for_use_.access().contains(key), "Index not yet ready."); return index->access().size(); } + /** + * Returns the approximate position and count of the given value in the + * index for the given Key. + * + * Both are approximations for several reasons. Initially the position + * and count are obtained from the skipist (the index) and as such are + * not exact for perfromance reasons. At the same time the position + * and count are calculated based on property value comparison: an + * additional error is accumulated because the index could contain + * the same vertex with the same value multiple times, + * as well as the same vertex with different values. + */ + auto PositionAndCount(const Key &key, const PropertyValue &value) { + auto access = GetKeyStorage(key)->access(); + return access.position_and_count( + value, + // the 'less' function + [](const PropertyValue &a, const IndexEntry &b) { + return IndexEntry::Less(a, b.value_); + }, + // the 'equal_to' function + [](const PropertyValue &a, const IndexEntry &b) { + return !(IndexEntry::Less(a, b.value_) || + IndexEntry::Less(b.value_, a)); + }); + } + /** * @brief - Removes from the index all entries for which records don't contain * the given label anymore, or the record was deleted before this transaction @@ -249,7 +276,8 @@ class LabelPropertyIndex { */ void Refresh(const tx::Snapshot &snapshot, tx::Engine &engine) { return IndexUtils::Refresh<Key, IndexEntry, Vertex>( - indices_, snapshot, engine, [](const Key &key, const IndexEntry &entry) { + indices_, snapshot, engine, + [](const Key &key, const IndexEntry &entry) { return LabelPropertyIndex::Exists(key, entry.value_, entry.record_); }); } @@ -270,8 +298,8 @@ class LabelPropertyIndex { // Comparision operators - we need them to keep this sorted inside // skiplist. bool operator<(const IndexEntry &other) const { - bool this_value_smaller = Cmp(this->value_, other.value_); - if (this_value_smaller || Cmp(other.value_, this->value_)) + bool this_value_smaller = Less(this->value_, other.value_); + if (this_value_smaller || Less(other.value_, this->value_)) return this_value_smaller; if (this->vlist_ != other.vlist_) return this->vlist_ < other.vlist_; return this->record_ < other.record_; @@ -288,7 +316,7 @@ class LabelPropertyIndex { * @return true if the first property value is smaller( should be before) * than the second one */ - static bool Cmp(const PropertyValue &a, const PropertyValue &b) { + static bool Less(const PropertyValue &a, const PropertyValue &b) { if (a.type() != b.type() && !(IsCastableToDouble(a) && IsCastableToDouble(b))) return a.type() < b.type(); @@ -310,7 +338,7 @@ class LabelPropertyIndex { auto vb = b.Value<std::vector<PropertyValue>>(); if (va.size() != vb.size()) return va.size() < vb.size(); return lexicographical_compare(va.begin(), va.end(), vb.begin(), - vb.end(), Cmp); + vb.end(), Less); } default: permanent_fail("Unimplemented type operator."); @@ -350,8 +378,8 @@ class LabelPropertyIndex { */ bool IsAlreadyChecked(const IndexEntry &previous) const { return previous.vlist_ == this->vlist_ && - !Cmp(previous.value_, this->value_) && - !Cmp(this->value_, previous.value_); + !Less(previous.value_, this->value_) && + !Less(this->value_, previous.value_); } const PropertyValue value_; @@ -406,7 +434,7 @@ class LabelPropertyIndex { // Property doesn't exists. if (prop.type() == PropertyValue::Type::Null) return false; // Property value is the same as expected. - return !IndexEntry::Cmp(prop, value) && !IndexEntry::Cmp(value, prop); + return !IndexEntry::Less(prop, value) && !IndexEntry::Less(value, prop); } ConcurrentMap<Key, SkipList<IndexEntry> *> indices_; diff --git a/src/utils/bound.hpp b/src/utils/bound.hpp index 7ef896f12..b4a247d30 100644 --- a/src/utils/bound.hpp +++ b/src/utils/bound.hpp @@ -26,10 +26,34 @@ class Bound { const auto &value() const { return value_; } /** Whether the bound is inclusive or exclusive. */ auto type() const { return type_; } + auto IsInclusive() const { return type_ == BoundType::INCLUSIVE; } + auto IsExclusive() const { return type_ == BoundType::EXCLUSIVE; } private: TValue value_; Type type_; }; +/** + * Creates an inclusive @c Bound. + * + * @param value - Bound value + * @tparam TValue - value type + */ +template <typename TValue> +Bound<TValue> MakeBoundInclusive(TValue value) { + return Bound<TValue>(value, BoundType::INCLUSIVE); +}; + +/** + * Creates an exclusive @c Bound. + * + * @param value - Bound value + * @tparam TValue - value type + */ +template <typename TValue> +Bound<TValue> MakeBoundExclusive(TValue value) { + return Bound<TValue>(value, BoundType::EXCLUSIVE); +}; + } // namespace utils diff --git a/tests/manual/sl_position_and_count.cpp b/tests/manual/sl_position_and_count.cpp index b0e4a3fee..b5a488731 100644 --- a/tests/manual/sl_position_and_count.cpp +++ b/tests/manual/sl_position_and_count.cpp @@ -38,15 +38,19 @@ std::unique_ptr<SkipList<int>> make_sl(int size) { * * @param size - size of the skiplist to test with * @param iterations - number of iterations of each test. - * @param granulation - How many sequential ints should be + * @param granularity - How many sequential ints should be * considered equal in testing by the custom `less` * function. */ -void test(int size, int iterations = 20, int granulation = 1) { - auto less = [granulation](const int &a, const int &b) { - return a / granulation < b / granulation; +void test(int size, int iterations = 20, int granularity = 1) { + auto less = [granularity](const int &a, const int &b) { + return a / granularity < b / granularity; }; - log("\nTesting skiplist size {} with granulation {}", size, granulation); + + auto equal = [granularity](const int &a, const int &b) { + return a / granularity == b / granularity; + }; + log("\nTesting skiplist size {} with granularity {}", size, granularity); // test at 1/4, 1/2 and 3/4 points std::vector<int> test_positions({size / 4, size / 2, size * 3 / 4}); @@ -60,7 +64,7 @@ void test(int size, int iterations = 20, int granulation = 1) { for (auto pos : {0, 1, 2}) { clock_t start_time = clock(); auto pos_and_count = - sl->access().position_and_count(test_positions[pos], less); + sl->access().position_and_count(test_positions[pos], less, equal); auto t = double(clock() - start_time) / CLOCKS_PER_SEC; position[pos].push_back(pos_and_count.first); @@ -77,7 +81,7 @@ void test(int size, int iterations = 20, int granulation = 1) { position_elem = std::abs(position_elem - test_position); log("\t\tMean position error: {}", mean(position[pos_index])); for (auto &count_elem : count[pos_index]) - count_elem = std::abs(count_elem - granulation); + count_elem = std::abs(count_elem - granularity); log("\t\tMean count error: {}", mean(count[pos_index])); log("\t\tMean time (ms): {}", mean(time[pos_index]) * 1000); } @@ -91,9 +95,9 @@ int main(int argc, char *argv[]) { if (argc > 1) size = (int)std::stoi(argv[1]); if (argc > 2) iterations = (int)std::stoi(argv[2]); - std::vector<int> granulations; - for (int i = 1; i < size; i *= 100) granulations.push_back(i); - for (auto granulation : granulations) test(size, iterations, granulation); + std::vector<int> granularitys; + for (int i = 1; i < size; i *= 100) granularitys.push_back(i); + for (auto granularity : granularitys) test(size, iterations, granularity); return 0; } diff --git a/tests/unit/graph_db_accessor_index_api.cpp b/tests/unit/graph_db_accessor_index_api.cpp index 98a21662a..e1d59c110 100644 --- a/tests/unit/graph_db_accessor_index_api.cpp +++ b/tests/unit/graph_db_accessor_index_api.cpp @@ -1,3 +1,4 @@ +#include <experimental/optional> #include <memory> #include <gmock/gmock.h> @@ -6,6 +7,7 @@ #include "data_structures/ptr_int.hpp" #include "database/graph_db_accessor.hpp" #include "dbms/dbms.hpp" +#include "utils/bound.hpp" using testing::UnorderedElementsAreArray; @@ -82,6 +84,67 @@ TEST(GraphDbAccessor, VertexByLabelPropertyCount) { EXPECT_EQ(dba->vertices_count(), 14 + 15 + 16 + 17); } +#define EXPECT_WITH_MARGIN(x, center) \ + EXPECT_THAT( \ + x, testing::AllOf(testing::Ge(center - 2), testing::Le(center + 2))); + +TEST(GraphDbAccessor, VertexByLabelPropertyValueCount) { + Dbms dbms; + auto dba = dbms.active(); + auto label = dba->label("label"); + auto property = dba->property("property"); + dba->BuildIndex(label, property); + + // add some vertices without the property + for (int i = 0; i < 20; i++) dba->insert_vertex(); + + // add vertices with prop values [0, 29), ten vertices for each value + for (int i = 0; i < 300; i++) { + auto vertex = dba->insert_vertex(); + vertex.add_label(label); + vertex.PropsSet(property, i / 10); + } + // add verties in t he [30, 40) range, 100 vertices for each value + for (int i = 0; i < 1000; i++) { + auto vertex = dba->insert_vertex(); + vertex.add_label(label); + vertex.PropsSet(property, 30 + i / 100); + } + + // test estimates for exact value count + EXPECT_WITH_MARGIN(dba->vertices_count(label, property, 10), 10); + EXPECT_WITH_MARGIN(dba->vertices_count(label, property, 14), 10); + EXPECT_WITH_MARGIN(dba->vertices_count(label, property, 30), 100); + EXPECT_WITH_MARGIN(dba->vertices_count(label, property, 39), 100); + EXPECT_EQ(dba->vertices_count(label, property, 40), 0); + + // helper functions + auto Inclusive = [](int64_t value) { + return std::experimental::make_optional( + utils::MakeBoundInclusive(PropertyValue(value))); + }; + auto Exclusive = [](int64_t value) { + return std::experimental::make_optional( + utils::MakeBoundExclusive(PropertyValue(value))); + }; + auto Count = [&dba, label, property](auto lower, auto upper) { + return dba->vertices_count(label, property, lower, upper); + }; + + using std::experimental::nullopt; + EXPECT_DEATH(Count(nullopt, nullopt), "bound must be provided"); + EXPECT_WITH_MARGIN(Count(nullopt, Exclusive(4)), 40); + EXPECT_WITH_MARGIN(Count(nullopt, Inclusive(4)), 50); + EXPECT_WITH_MARGIN(Count(Exclusive(13), nullopt), 160 + 1000); + EXPECT_WITH_MARGIN(Count(Inclusive(13), nullopt), 170 + 1000); + EXPECT_WITH_MARGIN(Count(Inclusive(13), Exclusive(14)), 10); + EXPECT_WITH_MARGIN(Count(Exclusive(13), Inclusive(14)), 10); + EXPECT_WITH_MARGIN(Count(Exclusive(13), Exclusive(13)), 0); + EXPECT_WITH_MARGIN(Count(Inclusive(20), Exclusive(13)), 0); +} + +#undef EXPECT_WITH_MARGIN + TEST(GraphDbAccessor, EdgeByEdgeTypeCount) { Dbms dbms; auto dba = dbms.active(); @@ -166,7 +229,8 @@ TEST(GraphDbAccessor, FilterLabelPropertySpecificValue) { i); } -// Inserts integers, double, lists, booleans into index and check if they are +// Inserts integers, double, lists, booleans into index and check if they +// are // sorted as they should be sorted. TEST(GraphDbAccessor, SortedLabelPropertyEntries) { Dbms dbms; @@ -212,7 +276,8 @@ TEST(GraphDbAccessor, SortedLabelPropertyEntries) { expected_property_value[20 + 2 * i + 1] = vertex_accessor.PropsAt(property); } - // lists of ints - insert in reverse to check for comparision between lists. + // lists of ints - insert in reverse to check for comparision between + // lists. for (int i = 9; i >= 0; --i) { auto vertex_accessor = dba2->insert_vertex(); vertex_accessor.add_label(label); @@ -333,9 +398,3 @@ TEST(GraphDbAccessor, VisibilityAfterDeletion) { EXPECT_EQ(Count(dba->vertices(lab, false)), 3); EXPECT_EQ(Count(dba->vertices(lab, true)), 3); } - -int main(int argc, char **argv) { - google::InitGoogleLogging(argv[0]); - ::testing::InitGoogleTest(&argc, argv); - return RUN_ALL_TESTS(); -} diff --git a/tests/unit/skiplist_position_and_count.cpp b/tests/unit/skiplist_position_and_count.cpp index 36c0e1fc8..806294ea6 100644 --- a/tests/unit/skiplist_position_and_count.cpp +++ b/tests/unit/skiplist_position_and_count.cpp @@ -39,14 +39,20 @@ auto Less(int granularity) { }; } -#define EXPECT_ABS_POS_COUNT(granularity, position, expected_position, \ - expected_count) \ - { \ - auto sl = SkiplistRange(10000); \ - auto position_and_count = \ - sl->access().position_and_count(position, Less(granularity), 1000, 0); \ - EXPECT_EQ(position_and_count.first, expected_position); \ - EXPECT_EQ(position_and_count.second, expected_count); \ +auto Equal(int granularity) { + return [granularity](const int &a, const int &b) { + return a / granularity == b / granularity; + }; +} + +#define EXPECT_ABS_POS_COUNT(granularity, position, expected_position, \ + expected_count) \ + { \ + auto sl = SkiplistRange(10000); \ + auto position_and_count = sl->access().position_and_count( \ + position, Less(granularity), Equal(granularity), 1000, 0); \ + EXPECT_EQ(position_and_count.first, expected_position); \ + EXPECT_EQ(position_and_count.second, expected_count); \ } TEST(SkiplistPosAndCount, AbsoluteAccuracy) {