Implement vertex count estimation
Reviewers: teon.banek, mferencevic Reviewed By: teon.banek Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2306
This commit is contained in:
parent
97b20a9384
commit
7e741b8d25
@ -572,6 +572,53 @@ LabelPropertyIndex::Iterable::Iterator LabelPropertyIndex::Iterable::end() {
|
||||
return Iterator(this, index_accessor_.end());
|
||||
}
|
||||
|
||||
// A helper function for determining the skip list layer used for estimating the
|
||||
// number of elements in the label property index. The lower layer we use, the
|
||||
// better approximation we get (if we use the lowest layer, we get the exact
|
||||
// numbers). However, lower skip list layers contain more elements so we must
|
||||
// iterate through more items to get the estimate.
|
||||
//
|
||||
// Our goal is to achieve balance between execution time and approximation
|
||||
// precision. The expected number of elements at the k-th skip list layer is N *
|
||||
// (1/2)^(k-1), where N is the skip-list size. We choose to iterate through no
|
||||
// more than sqrt(N) items for large N when calculating the estimate, so we need
|
||||
// to choose the skip-list layer such that N * (1/2)^(k-1) <= sqrt(N). That is
|
||||
// equivalent to k >= 1 + 1/2 * log2(N), so we choose k to be 1 + ceil(log2(N) /
|
||||
// 2).
|
||||
//
|
||||
// For N small enough (arbitrarily chosen to be 500), we will just use the
|
||||
// lowest layer to get the exact numbers. Mostly because this makes writing
|
||||
// tests easier.
|
||||
namespace {
|
||||
uint64_t SkipListLayerForEstimation(uint64_t N) {
|
||||
if (N <= 500) return 1;
|
||||
return std::min(1 + (utils::Log2(N) + 1) / 2, utils::kSkipListMaxHeight);
|
||||
}
|
||||
} // namespace
|
||||
|
||||
int64_t LabelPropertyIndex::ApproximateVertexCount(
|
||||
LabelId label, PropertyId property, const PropertyValue &value) const {
|
||||
auto it = index_.find({label, property});
|
||||
CHECK(it != index_.end())
|
||||
<< "Index for label " << label.AsUint() << " and property "
|
||||
<< property.AsUint() << " doesn't exist";
|
||||
auto acc = it->second.access();
|
||||
return acc.estimate_count(value, SkipListLayerForEstimation(acc.size()));
|
||||
}
|
||||
|
||||
int64_t LabelPropertyIndex::ApproximateVertexCount(
|
||||
LabelId label, PropertyId property,
|
||||
const std::optional<utils::Bound<PropertyValue>> &lower,
|
||||
const std::optional<utils::Bound<PropertyValue>> &upper) const {
|
||||
auto it = index_.find({label, property});
|
||||
CHECK(it != index_.end())
|
||||
<< "Index for label " << label.AsUint() << " and property "
|
||||
<< property.AsUint() << " doesn't exist";
|
||||
auto acc = it->second.access();
|
||||
return acc.estimate_range_count(lower, upper,
|
||||
SkipListLayerForEstimation(acc.size()));
|
||||
}
|
||||
|
||||
void RemoveObsoleteEntries(Indices *indices,
|
||||
uint64_t oldest_active_start_timestamp) {
|
||||
indices->label_index.RemoveObsoleteEntries(oldest_active_start_timestamp);
|
||||
|
@ -92,6 +92,10 @@ class LabelIndex {
|
||||
transaction, indices_);
|
||||
}
|
||||
|
||||
int64_t ApproximateVertexCount(LabelId label) {
|
||||
return GetOrCreateStorage(label)->size();
|
||||
}
|
||||
|
||||
private:
|
||||
utils::SkipList<LabelStorage> index_;
|
||||
Indices *indices_;
|
||||
@ -193,6 +197,22 @@ class LabelPropertyIndex {
|
||||
upper_bound, view, transaction, indices_);
|
||||
}
|
||||
|
||||
int64_t ApproximateVertexCount(LabelId label, PropertyId property) const {
|
||||
auto it = index_.find({label, property});
|
||||
CHECK(it != index_.end())
|
||||
<< "Index for label " << label.AsUint() << " and property "
|
||||
<< property.AsUint() << " doesn't exist";
|
||||
return it->second.size();
|
||||
}
|
||||
|
||||
int64_t ApproximateVertexCount(LabelId label, PropertyId property,
|
||||
const PropertyValue &value) const;
|
||||
|
||||
int64_t ApproximateVertexCount(
|
||||
LabelId label, PropertyId property,
|
||||
const std::optional<utils::Bound<PropertyValue>> &lower,
|
||||
const std::optional<utils::Bound<PropertyValue>> &upper) const;
|
||||
|
||||
private:
|
||||
Indices *indices_;
|
||||
std::map<std::pair<LabelId, PropertyId>, utils::SkipList<Entry>> index_;
|
||||
|
@ -197,6 +197,45 @@ class Storage final {
|
||||
const std::optional<utils::Bound<PropertyValue>> &upper_bound,
|
||||
View view);
|
||||
|
||||
/// Return approximate number of all vertices in the database.
|
||||
/// Note that this is always an over-estimate and never an under-estimate.
|
||||
int64_t ApproximateVertexCount() const {
|
||||
return storage_->vertices_.size();
|
||||
}
|
||||
|
||||
/// Return approximate number of vertices with the given label.
|
||||
/// Note that this is always an over-estimate and never an under-estimate.
|
||||
int64_t ApproximateVertexCount(LabelId label) const {
|
||||
return storage_->indices_.label_index.ApproximateVertexCount(label);
|
||||
}
|
||||
|
||||
/// Return approximate number of vertices with the given label and property.
|
||||
/// Note that this is always an over-estimate and never an under-estimate.
|
||||
int64_t ApproximateVertexCount(LabelId label, PropertyId property) const {
|
||||
return storage_->indices_.label_property_index.ApproximateVertexCount(
|
||||
label, property);
|
||||
}
|
||||
|
||||
/// Return approximate number of vertices with the given label and the given
|
||||
/// value for the given property. Note that this is always an over-estimate
|
||||
/// and never an under-estimate.
|
||||
int64_t ApproximateVertexCount(LabelId label, PropertyId property,
|
||||
const PropertyValue &value) const {
|
||||
return storage_->indices_.label_property_index.ApproximateVertexCount(
|
||||
label, property, value);
|
||||
}
|
||||
|
||||
/// Return approximate number of vertices with the given label and value for
|
||||
/// the given property in the range defined by provided upper and lower
|
||||
/// bounds.
|
||||
int64_t ApproximateVertexCount(
|
||||
LabelId label, PropertyId property,
|
||||
const std::optional<utils::Bound<PropertyValue>> &lower,
|
||||
const std::optional<utils::Bound<PropertyValue>> &upper) const {
|
||||
return storage_->indices_.label_property_index.ApproximateVertexCount(
|
||||
label, property, lower, upper);
|
||||
}
|
||||
|
||||
Result<bool> DeleteVertex(VertexAccessor *vertex);
|
||||
|
||||
Result<bool> DetachDeleteVertex(VertexAccessor *vertex);
|
||||
|
@ -206,6 +206,18 @@ TEST_F(IndexTest, LabelIndexTransactionalIsolation) {
|
||||
UnorderedElementsAre(0, 1, 2, 3, 4));
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(hicpp-special-member-functions)
|
||||
TEST_F(IndexTest, LabelIndexCountEstimate) {
|
||||
auto acc = storage.Access();
|
||||
for (int i = 0; i < 20; ++i) {
|
||||
auto vertex = CreateVertex(&acc);
|
||||
ASSERT_NO_ERROR(vertex.AddLabel(i % 3 ? label1 : label2));
|
||||
}
|
||||
|
||||
EXPECT_EQ(acc.ApproximateVertexCount(label1), 13);
|
||||
EXPECT_EQ(acc.ApproximateVertexCount(label2), 7);
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(hicpp-special-member-functions)
|
||||
TEST_F(IndexTest, LabelPropertyIndexCreateAndDrop) {
|
||||
EXPECT_TRUE(storage.CreateIndex(label1, prop_id));
|
||||
@ -466,3 +478,28 @@ TEST_F(IndexTest, LabelPropertyIndexFiltering) {
|
||||
UnorderedElementsAre(4, 5));
|
||||
}
|
||||
}
|
||||
|
||||
// NOLINTNEXTLINE(hicpp-special-member-functions)
|
||||
TEST_F(IndexTest, LabelPropertyIndexCountEstimate) {
|
||||
storage.CreateIndex(label1, prop_val);
|
||||
|
||||
auto acc = storage.Access();
|
||||
for (int i = 1; i <= 10; ++i) {
|
||||
for (int j = 0; j < i; ++j) {
|
||||
auto vertex = CreateVertex(&acc);
|
||||
ASSERT_NO_ERROR(vertex.AddLabel(label1));
|
||||
ASSERT_NO_ERROR(vertex.SetProperty(prop_val, PropertyValue(i)));
|
||||
}
|
||||
}
|
||||
|
||||
EXPECT_EQ(acc.ApproximateVertexCount(label1, prop_val), 55);
|
||||
for (int i = 1; i <= 10; ++i) {
|
||||
EXPECT_EQ(acc.ApproximateVertexCount(label1, prop_val, PropertyValue(i)),
|
||||
i);
|
||||
}
|
||||
|
||||
EXPECT_EQ(acc.ApproximateVertexCount(
|
||||
label1, prop_val, utils::MakeBoundInclusive(PropertyValue(2)),
|
||||
utils::MakeBoundInclusive(PropertyValue(6))),
|
||||
2 + 3 + 4 + 5 + 6);
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user