Implement vertex count estimation

Reviewers: teon.banek, mferencevic

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D2306
This commit is contained in:
Marin Tomic 2019-08-22 16:49:45 +02:00
parent 97b20a9384
commit 7e741b8d25
4 changed files with 143 additions and 0 deletions

View File

@ -572,6 +572,53 @@ LabelPropertyIndex::Iterable::Iterator LabelPropertyIndex::Iterable::end() {
return Iterator(this, index_accessor_.end());
}
// A helper function for determining the skip list layer used for estimating the
// number of elements in the label property index. The lower layer we use, the
// better approximation we get (if we use the lowest layer, we get the exact
// numbers). However, lower skip list layers contain more elements so we must
// iterate through more items to get the estimate.
//
// Our goal is to achieve balance between execution time and approximation
// precision. The expected number of elements at the k-th skip list layer is N *
// (1/2)^(k-1), where N is the skip-list size. We choose to iterate through no
// more than sqrt(N) items for large N when calculating the estimate, so we need
// to choose the skip-list layer such that N * (1/2)^(k-1) <= sqrt(N). That is
// equivalent to k >= 1 + 1/2 * log2(N), so we choose k to be 1 + ceil(log2(N) /
// 2).
//
// For N small enough (arbitrarily chosen to be 500), we will just use the
// lowest layer to get the exact numbers. Mostly because this makes writing
// tests easier.
namespace {
uint64_t SkipListLayerForEstimation(uint64_t N) {
if (N <= 500) return 1;
return std::min(1 + (utils::Log2(N) + 1) / 2, utils::kSkipListMaxHeight);
}
} // namespace
int64_t LabelPropertyIndex::ApproximateVertexCount(
LabelId label, PropertyId property, const PropertyValue &value) const {
auto it = index_.find({label, property});
CHECK(it != index_.end())
<< "Index for label " << label.AsUint() << " and property "
<< property.AsUint() << " doesn't exist";
auto acc = it->second.access();
return acc.estimate_count(value, SkipListLayerForEstimation(acc.size()));
}
int64_t LabelPropertyIndex::ApproximateVertexCount(
LabelId label, PropertyId property,
const std::optional<utils::Bound<PropertyValue>> &lower,
const std::optional<utils::Bound<PropertyValue>> &upper) const {
auto it = index_.find({label, property});
CHECK(it != index_.end())
<< "Index for label " << label.AsUint() << " and property "
<< property.AsUint() << " doesn't exist";
auto acc = it->second.access();
return acc.estimate_range_count(lower, upper,
SkipListLayerForEstimation(acc.size()));
}
void RemoveObsoleteEntries(Indices *indices,
uint64_t oldest_active_start_timestamp) {
indices->label_index.RemoveObsoleteEntries(oldest_active_start_timestamp);

View File

@ -92,6 +92,10 @@ class LabelIndex {
transaction, indices_);
}
int64_t ApproximateVertexCount(LabelId label) {
return GetOrCreateStorage(label)->size();
}
private:
utils::SkipList<LabelStorage> index_;
Indices *indices_;
@ -193,6 +197,22 @@ class LabelPropertyIndex {
upper_bound, view, transaction, indices_);
}
int64_t ApproximateVertexCount(LabelId label, PropertyId property) const {
auto it = index_.find({label, property});
CHECK(it != index_.end())
<< "Index for label " << label.AsUint() << " and property "
<< property.AsUint() << " doesn't exist";
return it->second.size();
}
int64_t ApproximateVertexCount(LabelId label, PropertyId property,
const PropertyValue &value) const;
int64_t ApproximateVertexCount(
LabelId label, PropertyId property,
const std::optional<utils::Bound<PropertyValue>> &lower,
const std::optional<utils::Bound<PropertyValue>> &upper) const;
private:
Indices *indices_;
std::map<std::pair<LabelId, PropertyId>, utils::SkipList<Entry>> index_;

View File

@ -197,6 +197,45 @@ class Storage final {
const std::optional<utils::Bound<PropertyValue>> &upper_bound,
View view);
/// Return approximate number of all vertices in the database.
/// Note that this is always an over-estimate and never an under-estimate.
int64_t ApproximateVertexCount() const {
return storage_->vertices_.size();
}
/// Return approximate number of vertices with the given label.
/// Note that this is always an over-estimate and never an under-estimate.
int64_t ApproximateVertexCount(LabelId label) const {
return storage_->indices_.label_index.ApproximateVertexCount(label);
}
/// Return approximate number of vertices with the given label and property.
/// Note that this is always an over-estimate and never an under-estimate.
int64_t ApproximateVertexCount(LabelId label, PropertyId property) const {
return storage_->indices_.label_property_index.ApproximateVertexCount(
label, property);
}
/// Return approximate number of vertices with the given label and the given
/// value for the given property. Note that this is always an over-estimate
/// and never an under-estimate.
int64_t ApproximateVertexCount(LabelId label, PropertyId property,
const PropertyValue &value) const {
return storage_->indices_.label_property_index.ApproximateVertexCount(
label, property, value);
}
/// Return approximate number of vertices with the given label and value for
/// the given property in the range defined by provided upper and lower
/// bounds.
int64_t ApproximateVertexCount(
LabelId label, PropertyId property,
const std::optional<utils::Bound<PropertyValue>> &lower,
const std::optional<utils::Bound<PropertyValue>> &upper) const {
return storage_->indices_.label_property_index.ApproximateVertexCount(
label, property, lower, upper);
}
Result<bool> DeleteVertex(VertexAccessor *vertex);
Result<bool> DetachDeleteVertex(VertexAccessor *vertex);

View File

@ -206,6 +206,18 @@ TEST_F(IndexTest, LabelIndexTransactionalIsolation) {
UnorderedElementsAre(0, 1, 2, 3, 4));
}
// NOLINTNEXTLINE(hicpp-special-member-functions)
TEST_F(IndexTest, LabelIndexCountEstimate) {
auto acc = storage.Access();
for (int i = 0; i < 20; ++i) {
auto vertex = CreateVertex(&acc);
ASSERT_NO_ERROR(vertex.AddLabel(i % 3 ? label1 : label2));
}
EXPECT_EQ(acc.ApproximateVertexCount(label1), 13);
EXPECT_EQ(acc.ApproximateVertexCount(label2), 7);
}
// NOLINTNEXTLINE(hicpp-special-member-functions)
TEST_F(IndexTest, LabelPropertyIndexCreateAndDrop) {
EXPECT_TRUE(storage.CreateIndex(label1, prop_id));
@ -466,3 +478,28 @@ TEST_F(IndexTest, LabelPropertyIndexFiltering) {
UnorderedElementsAre(4, 5));
}
}
// NOLINTNEXTLINE(hicpp-special-member-functions)
TEST_F(IndexTest, LabelPropertyIndexCountEstimate) {
storage.CreateIndex(label1, prop_val);
auto acc = storage.Access();
for (int i = 1; i <= 10; ++i) {
for (int j = 0; j < i; ++j) {
auto vertex = CreateVertex(&acc);
ASSERT_NO_ERROR(vertex.AddLabel(label1));
ASSERT_NO_ERROR(vertex.SetProperty(prop_val, PropertyValue(i)));
}
}
EXPECT_EQ(acc.ApproximateVertexCount(label1, prop_val), 55);
for (int i = 1; i <= 10; ++i) {
EXPECT_EQ(acc.ApproximateVertexCount(label1, prop_val, PropertyValue(i)),
i);
}
EXPECT_EQ(acc.ApproximateVertexCount(
label1, prop_val, utils::MakeBoundInclusive(PropertyValue(2)),
utils::MakeBoundInclusive(PropertyValue(6))),
2 + 3 + 4 + 5 + 6);
}