Improve property store performance

Summary:
This diff improves the performance of `PropertyStore` with two main
techniques:

First:
`PropertyValue` has a very expensive constructor and destructor. The
`PropertyValue` was previously passed as a return value from many functions
wrapped in a `std::optional`. That caused the `PropertyValue`
constructor/destructor to be called for each intermediary value that was passed
between functions. This diff changes the functions to return a `bool` value
that imitates the `std::optional` "emptyness" flag and the `PropertyValue` is
modified using a pointer to it so that its constructor/destructor is called
only once.

Second:
The `PropertyStore` buffer was previously iterated through at least twice.
First to determine the exact position of the encoded property and then to
actually decode the property. This diff combines the two passes into a single
pass so that the property is immediately loaded if it is found.

Reviewers: buda

Reviewed By: buda

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D2766
This commit is contained in:
Matej Ferencevic 2020-05-05 10:29:46 +02:00
parent 317f7f118c
commit 4d2eda398f
3 changed files with 295 additions and 99 deletions

View File

@ -24,7 +24,7 @@ namespace {
// bytes). Also, the `PropertyValue` must have a `type` member that (even though // bytes). Also, the `PropertyValue` must have a `type` member that (even though
// it is small) causes a padding hole to be inserted in the `PropertyValue` that // it is small) causes a padding hole to be inserted in the `PropertyValue` that
// wastes even more memory. Memory is wasted even more when the `PropertyValue` // wastes even more memory. Memory is wasted even more when the `PropertyValue`
// stores a list or a map of `PropertyValues` because each of the internal // stores a list or a map of `PropertyValue`s because each of the internal
// values also wastes memory. // values also wastes memory.
// //
// Even though there is a lot of memory being wasted in `PropertyValue`, all of // Even though there is a lot of memory being wasted in `PropertyValue`, all of
@ -433,102 +433,118 @@ std::optional<std::pair<Type, Size>> EncodePropertyValue(
} }
// Function used to decode a PropertyValue from a byte stream. It can either // Function used to decode a PropertyValue from a byte stream. It can either
// decode or skip the encoded PropertyValue, depending on the supplied template // decode or skip the encoded PropertyValue, depending on the supplied value
// parameter. // pointer.
template <bool read_data> [[nodiscard]] bool DecodePropertyValue(Reader *reader, Type type,
std::optional<PropertyValue> DecodePropertyValue(Reader *reader, Type type, Size payload_size,
Size payload_size) { PropertyValue *value) {
switch (type) { switch (type) {
case Type::EMPTY: case Type::EMPTY: {
return std::nullopt; return false;
case Type::NONE: }
return PropertyValue(); case Type::NONE: {
case Type::BOOL: { if (value) {
if (payload_size == Size::INT64) { *value = PropertyValue();
return PropertyValue(true);
} else {
return PropertyValue(false);
} }
return true;
}
case Type::BOOL: {
if (value) {
if (payload_size == Size::INT64) {
*value = PropertyValue(true);
} else {
*value = PropertyValue(false);
}
}
return true;
} }
case Type::INT: { case Type::INT: {
auto value = reader->ReadInt(payload_size); auto int_v = reader->ReadInt(payload_size);
if (!value) return std::nullopt; if (!int_v) return false;
return PropertyValue(*value); if (value) {
*value = PropertyValue(*int_v);
}
return true;
} }
case Type::DOUBLE: { case Type::DOUBLE: {
auto value = reader->ReadDouble(payload_size); auto double_v = reader->ReadDouble(payload_size);
if (!value) return std::nullopt; if (!double_v) return false;
return PropertyValue(*value); if (value) {
*value = PropertyValue(*double_v);
}
return true;
} }
case Type::STRING: { case Type::STRING: {
auto size = reader->ReadUint(payload_size); auto size = reader->ReadUint(payload_size);
if (!size) return std::nullopt; if (!size) return false;
if constexpr (read_data) { if (value) {
std::string value(*size, '\0'); std::string str_v(*size, '\0');
if (!reader->ReadBytes(value.data(), *size)) return std::nullopt; if (!reader->ReadBytes(str_v.data(), *size)) return false;
return PropertyValue(std::move(value)); *value = PropertyValue(std::move(str_v));
} else { } else {
if (!reader->SkipBytes(*size)) return std::nullopt; if (!reader->SkipBytes(*size)) return false;
return PropertyValue();
} }
return true;
} }
case Type::LIST: { case Type::LIST: {
auto size = reader->ReadUint(payload_size); auto size = reader->ReadUint(payload_size);
if (!size) return std::nullopt; if (!size) return false;
if constexpr (read_data) { if (value) {
std::vector<PropertyValue> list; std::vector<PropertyValue> list;
list.reserve(*size); list.reserve(*size);
for (uint64_t i = 0; i < *size; ++i) { for (uint64_t i = 0; i < *size; ++i) {
auto metadata = reader->ReadMetadata(); auto metadata = reader->ReadMetadata();
if (!metadata) return std::nullopt; if (!metadata) return false;
auto ret = DecodePropertyValue<read_data>(reader, metadata->type, PropertyValue item;
metadata->payload_size); if (!DecodePropertyValue(reader, metadata->type,
if (!ret) return std::nullopt; metadata->payload_size, &item))
list.emplace_back(std::move(*ret)); return false;
list.emplace_back(std::move(item));
} }
return PropertyValue(std::move(list)); *value = PropertyValue(std::move(list));
} else { } else {
for (uint64_t i = 0; i < *size; ++i) { for (uint64_t i = 0; i < *size; ++i) {
auto metadata = reader->ReadMetadata(); auto metadata = reader->ReadMetadata();
if (!metadata) return std::nullopt; if (!metadata) return false;
auto ret = DecodePropertyValue<read_data>(reader, metadata->type, if (!DecodePropertyValue(reader, metadata->type,
metadata->payload_size); metadata->payload_size, nullptr))
if (!ret) return std::nullopt; return false;
} }
return PropertyValue();
} }
return true;
} }
case Type::MAP: { case Type::MAP: {
auto size = reader->ReadUint(payload_size); auto size = reader->ReadUint(payload_size);
if (!size) return std::nullopt; if (!size) return false;
if constexpr (read_data) { if (value) {
std::map<std::string, PropertyValue> map; std::map<std::string, PropertyValue> map;
for (uint64_t i = 0; i < *size; ++i) { for (uint64_t i = 0; i < *size; ++i) {
auto metadata = reader->ReadMetadata(); auto metadata = reader->ReadMetadata();
if (!metadata) return std::nullopt; if (!metadata) return false;
auto key_size = reader->ReadUint(metadata->id_size); auto key_size = reader->ReadUint(metadata->id_size);
if (!key_size) return std::nullopt; if (!key_size) return false;
std::string key(*key_size, '\0'); std::string key(*key_size, '\0');
if (!reader->ReadBytes(key.data(), *key_size)) return std::nullopt; if (!reader->ReadBytes(key.data(), *key_size)) return false;
auto ret = DecodePropertyValue<read_data>(reader, metadata->type, PropertyValue item;
metadata->payload_size); if (!DecodePropertyValue(reader, metadata->type,
if (!ret) return std::nullopt; metadata->payload_size, &item))
map.emplace(std::move(key), std::move(*ret)); return false;
map.emplace(std::move(key), std::move(item));
} }
return PropertyValue(std::move(map)); *value = PropertyValue(std::move(map));
} else { } else {
for (uint64_t i = 0; i < *size; ++i) { for (uint64_t i = 0; i < *size; ++i) {
auto metadata = reader->ReadMetadata(); auto metadata = reader->ReadMetadata();
if (!metadata) return std::nullopt; if (!metadata) return false;
auto key_size = reader->ReadUint(metadata->id_size); auto key_size = reader->ReadUint(metadata->id_size);
if (!key_size) return std::nullopt; if (!key_size) return false;
if (!reader->SkipBytes(*key_size)) return std::nullopt; if (!reader->SkipBytes(*key_size)) return false;
auto ret = DecodePropertyValue<read_data>(reader, metadata->type, if (!DecodePropertyValue(reader, metadata->type,
metadata->payload_size); metadata->payload_size, nullptr))
if (!ret) return std::nullopt; return false;
} }
return PropertyValue();
} }
return true;
} }
} }
} }
@ -551,40 +567,100 @@ bool EncodeProperty(Writer *writer, PropertyId property,
return true; return true;
} }
// Enum used to return status from the `DecodeExpectedProperty` function.
enum class DecodeExpectedPropertyStatus {
MISSING_DATA,
SMALLER,
EQUAL,
GREATER,
};
// Function used to decode a property (PropertyId, PropertyValue) from a byte // Function used to decode a property (PropertyId, PropertyValue) from a byte
// stream. It can either decode or skip the encoded PropertyValue, depending on // stream. It can either decode or skip the encoded PropertyValue, depending on
// the supplied template parameter. // the provided PropertyValue. The `expected_property` provides another hint
template <bool read_data> // whether the property should be decoded or skipped.
std::optional<std::pair<PropertyId, PropertyValue>> DecodeSkipProperty( //
Reader *reader) { // @return MISSING_DATA when there is not enough data in the buffer to decode
// the property
// @return SMALLER when the property that was currently read has a smaller
// property ID than the expected property; the value isn't
// loaded in this case
// @return EQUAL when the property that was currently read has an ID equal to
// the expected property ID; the value is loaded in this case
// @return GREATER when the property that was currenly read has a greater
// property ID than the expected property; the value isn't
// loaded in this case
//
// @sa DecodeAnyProperty
[[nodiscard]] DecodeExpectedPropertyStatus DecodeExpectedProperty(
Reader *reader, PropertyId expected_property, PropertyValue *value) {
auto metadata = reader->ReadMetadata();
if (!metadata) return DecodeExpectedPropertyStatus::MISSING_DATA;
auto property_id = reader->ReadUint(metadata->id_size);
if (!property_id) return DecodeExpectedPropertyStatus::MISSING_DATA;
// Don't load the value if this isn't the expected property.
if (property_id != expected_property.AsUint()) {
value = nullptr;
}
if (!DecodePropertyValue(reader, metadata->type, metadata->payload_size,
value))
return DecodeExpectedPropertyStatus::MISSING_DATA;
if (property_id < expected_property.AsUint()) {
return DecodeExpectedPropertyStatus::SMALLER;
} else if (property_id == expected_property.AsUint()) {
return DecodeExpectedPropertyStatus::EQUAL;
} else {
return DecodeExpectedPropertyStatus::GREATER;
}
}
// Function used to decode a property (PropertyId, PropertyValue) from a byte
// stream. It can either decode or skip the encoded PropertyValue, depending on
// the provided PropertyValue.
//
// @sa DecodeExpectedProperty
[[nodiscard]] std::optional<PropertyId> DecodeAnyProperty(
Reader *reader, PropertyValue *value) {
auto metadata = reader->ReadMetadata(); auto metadata = reader->ReadMetadata();
if (!metadata) return std::nullopt; if (!metadata) return std::nullopt;
auto property = reader->ReadUint(metadata->id_size); auto property_id = reader->ReadUint(metadata->id_size);
if (!property) return std::nullopt; if (!property_id) return std::nullopt;
auto value = DecodePropertyValue<read_data>(reader, metadata->type, if (!DecodePropertyValue(reader, metadata->type, metadata->payload_size,
metadata->payload_size); value))
if (!value) return std::nullopt; return std::nullopt;
return {{PropertyId::FromUint(*property), std::move(*value)}}; return PropertyId::FromUint(*property_id);
} }
// Helper function used to decode a property. // Function used to find and (selectively) get the property value of the
std::optional<std::pair<PropertyId, PropertyValue>> DecodeProperty( // property whose ID is `property`. It relies on the fact that the properties
Reader *reader) { // are sorted (by ID) in the buffer. If the function doesn't find the property,
return DecodeSkipProperty<true>(reader); // the `value` won't be updated.
//
// @sa FindSpecificPropertyAndBufferInfo
[[nodiscard]] DecodeExpectedPropertyStatus FindSpecificProperty(
Reader *reader, PropertyId property, PropertyValue *value) {
while (true) {
auto ret = DecodeExpectedProperty(reader, property, value);
// Because the properties are sorted in the buffer, we only need to
// continue searching for the property while this function returns a
// `SMALLER` value indicating that the ID of the found property is smaller
// than the seeked ID. All other return values (`MISSING_DATA`, `EQUAL` and
// `GREATER`) terminate the search.
if (ret != DecodeExpectedPropertyStatus::SMALLER) {
return ret;
}
}
} }
// Helper function used to skip a property. // Struct used to return info about the property position and buffer size.
std::optional<PropertyId> SkipProperty(Reader *reader) { struct SpecificPropertyAndBufferInfo {
auto ret = DecodeSkipProperty<false>(reader);
if (!ret) return std::nullopt;
return ret->first;
}
// Struct used to return info about the property buffer.
struct PropertyBufferInfo {
uint64_t property_begin; uint64_t property_begin;
uint64_t property_end; uint64_t property_end;
uint64_t property_size; uint64_t property_size;
@ -602,18 +678,22 @@ struct PropertyBufferInfo {
// and `property_begin` will be equal to `property_end`. Positions and size of // and `property_begin` will be equal to `property_end`. Positions and size of
// all properties is always calculated (even if the specific property isn't // all properties is always calculated (even if the specific property isn't
// found). // found).
PropertyBufferInfo FindProperty(Reader *reader, PropertyId property) { //
// @sa FindSpecificProperty
SpecificPropertyAndBufferInfo FindSpecificPropertyAndBufferInfo(
Reader *reader, PropertyId property) {
uint64_t property_begin = reader->GetPosition(); uint64_t property_begin = reader->GetPosition();
uint64_t property_end = reader->GetPosition(); uint64_t property_end = reader->GetPosition();
uint64_t all_begin = reader->GetPosition(); uint64_t all_begin = reader->GetPosition();
uint64_t all_end = reader->GetPosition(); uint64_t all_end = reader->GetPosition();
while (true) { while (true) {
auto ret = SkipProperty(reader); auto ret = DecodeExpectedProperty(reader, property, nullptr);
if (!ret) break; if (ret == DecodeExpectedPropertyStatus::MISSING_DATA) {
if (*ret < property) { break;
} else if (ret == DecodeExpectedPropertyStatus::SMALLER) {
property_begin = reader->GetPosition(); property_begin = reader->GetPosition();
property_end = reader->GetPosition(); property_end = reader->GetPosition();
} else if (*ret == property) { } else if (ret == DecodeExpectedPropertyStatus::EQUAL) {
property_end = reader->GetPosition(); property_end = reader->GetPosition();
} }
all_end = reader->GetPosition(); all_end = reader->GetPosition();
@ -722,13 +802,11 @@ PropertyValue PropertyStore::GetProperty(PropertyId property) const {
data = &buffer_[1]; data = &buffer_[1];
} }
Reader reader(data, size); Reader reader(data, size);
auto info = FindProperty(&reader, property); PropertyValue value;
if (info.property_size == 0) return PropertyValue(); if (FindSpecificProperty(&reader, property, &value) !=
Reader prop_reader(data + info.property_begin, info.property_size); DecodeExpectedPropertyStatus::EQUAL)
auto prop = DecodeProperty(&prop_reader); return PropertyValue();
CHECK(prop) << "Invalid database state!"; return value;
CHECK(prop->first == property) << "Invalid database state!";
return std::move(prop->second);
} }
bool PropertyStore::HasProperty(PropertyId property) const { bool PropertyStore::HasProperty(PropertyId property) const {
@ -741,8 +819,8 @@ bool PropertyStore::HasProperty(PropertyId property) const {
data = &buffer_[1]; data = &buffer_[1];
} }
Reader reader(data, size); Reader reader(data, size);
auto info = FindProperty(&reader, property); return FindSpecificProperty(&reader, property, nullptr) ==
return info.property_size != 0; DecodeExpectedPropertyStatus::EQUAL;
} }
std::map<PropertyId, PropertyValue> PropertyStore::Properties() const { std::map<PropertyId, PropertyValue> PropertyStore::Properties() const {
@ -757,9 +835,10 @@ std::map<PropertyId, PropertyValue> PropertyStore::Properties() const {
Reader reader(data, size); Reader reader(data, size);
std::map<PropertyId, PropertyValue> props; std::map<PropertyId, PropertyValue> props;
while (true) { while (true) {
auto ret = DecodeProperty(&reader); PropertyValue value;
if (!ret) break; auto prop = DecodeAnyProperty(&reader, &value);
props.emplace(ret->first, std::move(ret->second)); if (!prop) break;
props.emplace(*prop, std::move(value));
} }
return props; return props;
} }
@ -823,7 +902,7 @@ bool PropertyStore::SetProperty(PropertyId property,
} }
} else { } else {
Reader reader(data, size); Reader reader(data, size);
auto info = FindProperty(&reader, property); auto info = FindSpecificPropertyAndBufferInfo(&reader, property);
existed = info.property_size != 0; existed = info.property_size != 0;
auto new_size = info.all_size - info.property_size + property_size; auto new_size = info.all_size - info.property_size + property_size;
auto new_size_to_power_of_8 = ToPowerOf8(new_size); auto new_size_to_power_of_8 = ToPowerOf8(new_size);

View File

@ -57,3 +57,6 @@ target_link_libraries(${test_prefix}expansion mg-query mg-communication)
add_benchmark(storage_v2_gc.cpp) add_benchmark(storage_v2_gc.cpp)
target_link_libraries(${test_prefix}storage_v2_gc mg-storage-v2) target_link_libraries(${test_prefix}storage_v2_gc mg-storage-v2)
add_benchmark(storage_v2_property_store.cpp)
target_link_libraries(${test_prefix}storage_v2_property_store mg-storage-v2)

View File

@ -0,0 +1,114 @@
#include <chrono>
#include <iostream>
#include <map>
#include <random>
#include <benchmark/benchmark.h>
#include "storage/v2/property_store.hpp"
///////////////////////////////////////////////////////////////////////////////
// PropertyStore Set
///////////////////////////////////////////////////////////////////////////////
// NOLINTNEXTLINE(google-runtime-references)
static void PropertyStoreSet(benchmark::State &state) {
storage::PropertyStore store;
std::mt19937 gen(state.thread_index);
std::uniform_int_distribution<uint64_t> dist(0, state.range(0) - 1);
uint64_t counter = 0;
while (state.KeepRunning()) {
auto prop = storage::PropertyId::FromUint(dist(gen));
store.SetProperty(prop, storage::PropertyValue(42));
++counter;
}
state.SetItemsProcessed(counter);
}
BENCHMARK(PropertyStoreSet)
->RangeMultiplier(2)
->Range(1, 1024)
->Unit(benchmark::kNanosecond)
->UseRealTime();
///////////////////////////////////////////////////////////////////////////////
// std::map Set
///////////////////////////////////////////////////////////////////////////////
// NOLINTNEXTLINE(google-runtime-references)
static void StdMapSet(benchmark::State &state) {
std::map<storage::PropertyId, storage::PropertyValue> store;
std::mt19937 gen(state.thread_index);
std::uniform_int_distribution<uint64_t> dist(0, state.range(0) - 1);
uint64_t counter = 0;
while (state.KeepRunning()) {
auto prop = storage::PropertyId::FromUint(dist(gen));
store.emplace(prop, storage::PropertyValue(42));
++counter;
}
state.SetItemsProcessed(counter);
}
BENCHMARK(StdMapSet)
->RangeMultiplier(2)
->Range(1, 1024)
->Unit(benchmark::kNanosecond)
->UseRealTime();
///////////////////////////////////////////////////////////////////////////////
// PropertyStore Get
///////////////////////////////////////////////////////////////////////////////
// NOLINTNEXTLINE(google-runtime-references)
static void PropertyStoreGet(benchmark::State &state) {
storage::PropertyStore store;
for (uint64_t i = 0; i < state.range(0); ++i) {
auto prop = storage::PropertyId::FromUint(i);
store.SetProperty(prop, storage::PropertyValue(0));
}
std::mt19937 gen(state.thread_index);
std::uniform_int_distribution<uint64_t> dist(0, state.range(0) - 1);
uint64_t counter = 0;
while (state.KeepRunning()) {
auto prop = storage::PropertyId::FromUint(dist(gen));
store.GetProperty(prop);
++counter;
}
state.SetItemsProcessed(counter);
}
BENCHMARK(PropertyStoreGet)
->RangeMultiplier(2)
->Range(1, 1024)
->Unit(benchmark::kNanosecond)
->UseRealTime();
///////////////////////////////////////////////////////////////////////////////
// std::map Get
///////////////////////////////////////////////////////////////////////////////
// NOLINTNEXTLINE(google-runtime-references)
static void StdMapGet(benchmark::State &state) {
std::map<storage::PropertyId, storage::PropertyValue> store;
for (uint64_t i = 0; i < state.range(0); ++i) {
auto prop = storage::PropertyId::FromUint(i);
store.emplace(prop, storage::PropertyValue(0));
}
std::mt19937 gen(state.thread_index);
std::uniform_int_distribution<uint64_t> dist(0, state.range(0) - 1);
uint64_t counter = 0;
while (state.KeepRunning()) {
auto prop = storage::PropertyId::FromUint(dist(gen));
store.find(prop);
++counter;
}
state.SetItemsProcessed(counter);
}
BENCHMARK(StdMapGet)
->RangeMultiplier(2)
->Range(1, 1024)
->Unit(benchmark::kNanosecond)
->UseRealTime();
BENCHMARK_MAIN();