diff --git a/src/storage/v2/indices.cpp b/src/storage/v2/indices.cpp index 78e3fc408..61ce7357a 100644 --- a/src/storage/v2/indices.cpp +++ b/src/storage/v2/indices.cpp @@ -572,6 +572,53 @@ LabelPropertyIndex::Iterable::Iterator LabelPropertyIndex::Iterable::end() { return Iterator(this, index_accessor_.end()); } +// A helper function for determining the skip list layer used for estimating the +// number of elements in the label property index. The lower layer we use, the +// better approximation we get (if we use the lowest layer, we get the exact +// numbers). However, lower skip list layers contain more elements so we must +// iterate through more items to get the estimate. +// +// Our goal is to achieve balance between execution time and approximation +// precision. The expected number of elements at the k-th skip list layer is N * +// (1/2)^(k-1), where N is the skip-list size. We choose to iterate through no +// more than sqrt(N) items for large N when calculating the estimate, so we need +// to choose the skip-list layer such that N * (1/2)^(k-1) <= sqrt(N). That is +// equivalent to k >= 1 + 1/2 * log2(N), so we choose k to be 1 + ceil(log2(N) / +// 2). +// +// For N small enough (arbitrarily chosen to be 500), we will just use the +// lowest layer to get the exact numbers. Mostly because this makes writing +// tests easier. +namespace { +uint64_t SkipListLayerForEstimation(uint64_t N) { + if (N <= 500) return 1; + return std::min(1 + (utils::Log2(N) + 1) / 2, utils::kSkipListMaxHeight); +} +} // namespace + +int64_t LabelPropertyIndex::ApproximateVertexCount( + LabelId label, PropertyId property, const PropertyValue &value) const { + auto it = index_.find({label, property}); + CHECK(it != index_.end()) + << "Index for label " << label.AsUint() << " and property " + << property.AsUint() << " doesn't exist"; + auto acc = it->second.access(); + return acc.estimate_count(value, SkipListLayerForEstimation(acc.size())); +} + +int64_t LabelPropertyIndex::ApproximateVertexCount( + LabelId label, PropertyId property, + const std::optional> &lower, + const std::optional> &upper) const { + auto it = index_.find({label, property}); + CHECK(it != index_.end()) + << "Index for label " << label.AsUint() << " and property " + << property.AsUint() << " doesn't exist"; + auto acc = it->second.access(); + return acc.estimate_range_count(lower, upper, + SkipListLayerForEstimation(acc.size())); +} + void RemoveObsoleteEntries(Indices *indices, uint64_t oldest_active_start_timestamp) { indices->label_index.RemoveObsoleteEntries(oldest_active_start_timestamp); diff --git a/src/storage/v2/indices.hpp b/src/storage/v2/indices.hpp index eaa80bbf1..ca8ed9869 100644 --- a/src/storage/v2/indices.hpp +++ b/src/storage/v2/indices.hpp @@ -92,6 +92,10 @@ class LabelIndex { transaction, indices_); } + int64_t ApproximateVertexCount(LabelId label) { + return GetOrCreateStorage(label)->size(); + } + private: utils::SkipList index_; Indices *indices_; @@ -193,6 +197,22 @@ class LabelPropertyIndex { upper_bound, view, transaction, indices_); } + int64_t ApproximateVertexCount(LabelId label, PropertyId property) const { + auto it = index_.find({label, property}); + CHECK(it != index_.end()) + << "Index for label " << label.AsUint() << " and property " + << property.AsUint() << " doesn't exist"; + return it->second.size(); + } + + int64_t ApproximateVertexCount(LabelId label, PropertyId property, + const PropertyValue &value) const; + + int64_t ApproximateVertexCount( + LabelId label, PropertyId property, + const std::optional> &lower, + const std::optional> &upper) const; + private: Indices *indices_; std::map, utils::SkipList> index_; diff --git a/src/storage/v2/storage.hpp b/src/storage/v2/storage.hpp index c87e361d8..31f3df7fe 100644 --- a/src/storage/v2/storage.hpp +++ b/src/storage/v2/storage.hpp @@ -197,6 +197,45 @@ class Storage final { const std::optional> &upper_bound, View view); + /// Return approximate number of all vertices in the database. + /// Note that this is always an over-estimate and never an under-estimate. + int64_t ApproximateVertexCount() const { + return storage_->vertices_.size(); + } + + /// Return approximate number of vertices with the given label. + /// Note that this is always an over-estimate and never an under-estimate. + int64_t ApproximateVertexCount(LabelId label) const { + return storage_->indices_.label_index.ApproximateVertexCount(label); + } + + /// Return approximate number of vertices with the given label and property. + /// Note that this is always an over-estimate and never an under-estimate. + int64_t ApproximateVertexCount(LabelId label, PropertyId property) const { + return storage_->indices_.label_property_index.ApproximateVertexCount( + label, property); + } + + /// Return approximate number of vertices with the given label and the given + /// value for the given property. Note that this is always an over-estimate + /// and never an under-estimate. + int64_t ApproximateVertexCount(LabelId label, PropertyId property, + const PropertyValue &value) const { + return storage_->indices_.label_property_index.ApproximateVertexCount( + label, property, value); + } + + /// Return approximate number of vertices with the given label and value for + /// the given property in the range defined by provided upper and lower + /// bounds. + int64_t ApproximateVertexCount( + LabelId label, PropertyId property, + const std::optional> &lower, + const std::optional> &upper) const { + return storage_->indices_.label_property_index.ApproximateVertexCount( + label, property, lower, upper); + } + Result DeleteVertex(VertexAccessor *vertex); Result DetachDeleteVertex(VertexAccessor *vertex); diff --git a/tests/unit/storage_v2_indices.cpp b/tests/unit/storage_v2_indices.cpp index d601d60de..503d55911 100644 --- a/tests/unit/storage_v2_indices.cpp +++ b/tests/unit/storage_v2_indices.cpp @@ -206,6 +206,18 @@ TEST_F(IndexTest, LabelIndexTransactionalIsolation) { UnorderedElementsAre(0, 1, 2, 3, 4)); } +// NOLINTNEXTLINE(hicpp-special-member-functions) +TEST_F(IndexTest, LabelIndexCountEstimate) { + auto acc = storage.Access(); + for (int i = 0; i < 20; ++i) { + auto vertex = CreateVertex(&acc); + ASSERT_NO_ERROR(vertex.AddLabel(i % 3 ? label1 : label2)); + } + + EXPECT_EQ(acc.ApproximateVertexCount(label1), 13); + EXPECT_EQ(acc.ApproximateVertexCount(label2), 7); +} + // NOLINTNEXTLINE(hicpp-special-member-functions) TEST_F(IndexTest, LabelPropertyIndexCreateAndDrop) { EXPECT_TRUE(storage.CreateIndex(label1, prop_id)); @@ -466,3 +478,28 @@ TEST_F(IndexTest, LabelPropertyIndexFiltering) { UnorderedElementsAre(4, 5)); } } + +// NOLINTNEXTLINE(hicpp-special-member-functions) +TEST_F(IndexTest, LabelPropertyIndexCountEstimate) { + storage.CreateIndex(label1, prop_val); + + auto acc = storage.Access(); + for (int i = 1; i <= 10; ++i) { + for (int j = 0; j < i; ++j) { + auto vertex = CreateVertex(&acc); + ASSERT_NO_ERROR(vertex.AddLabel(label1)); + ASSERT_NO_ERROR(vertex.SetProperty(prop_val, PropertyValue(i))); + } + } + + EXPECT_EQ(acc.ApproximateVertexCount(label1, prop_val), 55); + for (int i = 1; i <= 10; ++i) { + EXPECT_EQ(acc.ApproximateVertexCount(label1, prop_val, PropertyValue(i)), + i); + } + + EXPECT_EQ(acc.ApproximateVertexCount( + label1, prop_val, utils::MakeBoundInclusive(PropertyValue(2)), + utils::MakeBoundInclusive(PropertyValue(6))), + 2 + 3 + 4 + 5 + 6); +}