Optimize splitting keys inside the on-disk storage (#1155)

This commit is contained in:
Aidar Samerkhanov 2023-08-17 19:09:21 +03:00 committed by GitHub
parent 8f3f693f20
commit 3bf2cf65ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 248 additions and 39 deletions

View File

@ -141,7 +141,7 @@ declare -A secondary_urls=(
["rocksdb"]="https://github.com/facebook/rocksdb.git"
["mgclient"]="https://github.com/memgraph/mgclient.git"
["pymgclient"]="https://github.com/memgraph/pymgclient.git"
["mgconsole"]="http://github.com/memgraph/mgconsole.git"
["mgconsole"]="https://github.com/memgraph/mgconsole.git"
["spdlog"]="https://github.com/gabime/spdlog"
["nlohmann"]="https://raw.githubusercontent.com/nlohmann/json/4f8fba14066156b73f1189a2b8bd568bde5284c5/single_include/nlohmann/json.hpp"
["neo4j"]="https://dist.neo4j.org/neo4j-community-5.6.0-unix.tar.gz"

View File

@ -11,6 +11,7 @@
#pragma once
#include <charconv>
#include <cstdint>
#include <iomanip>
#include <iterator>
@ -30,6 +31,47 @@ namespace memgraph::utils {
static constexpr const char *outEdgeDirection = "0";
static constexpr const char *inEdgeDirection = "1";
namespace {
struct StartEndPositions {
size_t start;
size_t end;
size_t Size() const { return end - start; }
bool Valid() const { return start != std::string::npos && start <= end; }
};
template <typename T>
inline std::string_view FindPartOfStringView(const std::string_view str, const char delim, T partNumber) {
StartEndPositions startEndPos{0, 0};
for (int i = 0; i < partNumber; ++i) {
startEndPos.start = startEndPos.end;
startEndPos.end = str.find(delim, startEndPos.start);
if (i < partNumber - 1) {
if (startEndPos.end == std::string::npos) {
// We didn't find enough parts.
startEndPos.start = std::string::npos;
break;
}
++startEndPos.end;
}
}
return startEndPos.Valid() ? str.substr(startEndPos.start, startEndPos.Size()) : str;
}
inline std::string_view GetViewOfFirstPartOfSplit(const std::string_view src, const char delimiter) {
return FindPartOfStringView(src, delimiter, 1);
}
inline std::string_view GetViewOfSecondPartOfSplit(const std::string_view src, const char delimiter) {
return FindPartOfStringView(src, delimiter, 2);
}
inline std::string_view GetViewOfThirdPartOfSplit(const std::string_view src, const char delimiter) {
return FindPartOfStringView(src, delimiter, 3);
}
} // namespace
/// TODO: try to move this to hpp files so that we can follow jump on readings
inline std::string SerializeIdType(const auto &id) { return std::to_string(id.AsUint()); }
@ -111,13 +153,11 @@ inline std::string SerializeVertexAsValueForAuxiliaryStorages(storage::LabelId l
}
inline std::string ExtractGidFromKey(const std::string &key) {
std::vector<std::string> key_vector = utils::Split(key, "|");
return key_vector[1];
return std::string(GetViewOfSecondPartOfSplit(key, '|'));
}
inline storage::PropertyStore DeserializePropertiesFromAuxiliaryStorages(const std::string &value) {
std::vector<std::string> value_vector = utils::Split(value, "|");
std::string properties_str = value_vector[1];
const std::string_view properties_str = GetViewOfSecondPartOfSplit(value, '|');
return storage::PropertyStore::CreateFromBuffer(properties_str);
}
@ -136,9 +176,7 @@ inline std::vector<storage::LabelId> DeserializeLabelsFromMainDiskStorage(const
}
inline std::vector<std::string> ExtractLabelsFromMainDiskStorage(const std::string &key) {
std::vector<std::string> key_vector = utils::Split(key, "|");
std::string labels_str = key_vector[0];
return utils::Split(labels_str, ",");
return utils::Split(GetViewOfFirstPartOfSplit(key, '|'), ",");
}
inline storage::PropertyStore DeserializePropertiesFromMainDiskStorage(const std::string_view value) {
@ -167,10 +205,16 @@ inline std::string SerializeVertexAsValueForUniqueConstraint(const storage::Labe
}
inline storage::LabelId DeserializeConstraintLabelFromUniqueConstraintStorage(const std::string &key) {
std::vector<std::string> key_vector = utils::Split(key, "|");
std::vector<std::string> constraint_key = utils::Split(key_vector[0], ",");
const std::string_view firstPartKey = GetViewOfFirstPartOfSplit(key, '|');
const std::string_view constraint_key = GetViewOfFirstPartOfSplit(firstPartKey, ',');
/// TODO: andi Change this to deserialization method directly into the LabelId class
return storage::LabelId::FromUint(std::stoull(constraint_key[0]));
uint64_t labelID = 0;
const char *endOfConstraintKey = constraint_key.data() + constraint_key.size();
auto [ptr, ec] = std::from_chars(constraint_key.data(), endOfConstraintKey, labelID);
if (ec != std::errc() || ptr != endOfConstraintKey) {
throw std::invalid_argument("Failed to deserialize label id from unique constraint storage");
}
return storage::LabelId::FromUint(labelID);
}
inline storage::PropertyStore DeserializePropertiesFromUniqueConstraintStorage(const std::string &value) {
@ -225,8 +269,7 @@ inline std::string SerializeVertexAsValueForLabelPropertyIndex(storage::LabelId
}
inline std::string ExtractGidFromLabelPropertyIndexStorage(const std::string &key) {
std::vector<std::string> key_vector = utils::Split(key, "|");
return key_vector[2];
return std::string(GetViewOfThirdPartOfSplit(key, '|'));
}
inline std::vector<storage::LabelId> DeserializeLabelsFromLabelPropertyIndexStorage(const std::string &value) {

View File

@ -1,4 +1,4 @@
// Copyright 2022 Memgraph Ltd.
// Copyright 2023 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source

View File

@ -30,6 +30,9 @@
#include "storage/v2/view.hpp"
#include "utils/rocksdb_serialization.hpp"
// NOLINTNEXTLINE(google-build-using-namespace)
using namespace memgraph::storage;
/* Tests that serialization of vertices and edges to RocksDB works correctly.
*/
class RocksDBStorageTest : public ::testing::TestWithParam<bool> {
@ -38,7 +41,7 @@ class RocksDBStorageTest : public ::testing::TestWithParam<bool> {
RocksDBStorageTest() {
config_ = disk_test_utils::GenerateOnDiskConfig(testSuite);
storage = std::make_unique<memgraph::storage::DiskStorage>(config_);
storage = std::make_unique<DiskStorage>(config_);
}
void TearDown() override {
@ -49,8 +52,8 @@ class RocksDBStorageTest : public ::testing::TestWithParam<bool> {
~RocksDBStorageTest() override {}
protected:
std::unique_ptr<memgraph::storage::Storage> storage;
memgraph::storage::Config config_;
std::unique_ptr<Storage> storage;
Config config_;
};
TEST_F(RocksDBStorageTest, SerializeVertexGID) {
@ -74,22 +77,21 @@ TEST_F(RocksDBStorageTest, SerializeVertexGIDLabels) {
}
TEST_F(RocksDBStorageTest, SerializePropertiesLocalBuffer) {
memgraph::storage::PropertyStore props;
auto id = memgraph::storage::PropertyId::FromInt(0);
auto id_value = memgraph::storage::PropertyValue(1);
auto completion_percentage = memgraph::storage::PropertyId::FromInt(1);
auto completion_percentage_value = memgraph::storage::PropertyValue(14);
auto gender = memgraph::storage::PropertyId::FromInt(2);
auto gender_value = memgraph::storage::PropertyValue("man");
auto age = memgraph::storage::PropertyId::FromInt(3);
auto age_value = memgraph::storage::PropertyValue(26);
PropertyStore props;
auto id = PropertyId::FromInt(0);
auto id_value = PropertyValue(1);
auto completion_percentage = PropertyId::FromInt(1);
auto completion_percentage_value = PropertyValue(14);
auto gender = PropertyId::FromInt(2);
auto gender_value = PropertyValue("man");
auto age = PropertyId::FromInt(3);
auto age_value = PropertyValue(26);
ASSERT_TRUE(props.SetProperty(id, id_value));
ASSERT_TRUE(props.SetProperty(age, age_value));
ASSERT_TRUE(props.SetProperty(completion_percentage, completion_percentage_value));
ASSERT_TRUE(props.SetProperty(gender, gender_value));
std::string serialized_props = memgraph::utils::SerializeProperties(props);
memgraph::storage::PropertyStore deserialized_props =
memgraph::storage::PropertyStore::CreateFromBuffer(serialized_props);
PropertyStore deserialized_props = PropertyStore::CreateFromBuffer(serialized_props);
for (const auto &[prop_id, prop_value] : props.Properties()) {
ASSERT_TRUE(deserialized_props.IsPropertyEqual(prop_id, prop_value));
@ -97,25 +99,189 @@ TEST_F(RocksDBStorageTest, SerializePropertiesLocalBuffer) {
}
TEST_F(RocksDBStorageTest, SerializePropertiesExternalBuffer) {
memgraph::storage::PropertyStore props;
auto id = memgraph::storage::PropertyId::FromInt(0);
auto id_value = memgraph::storage::PropertyValue(1);
auto completion_percentage = memgraph::storage::PropertyId::FromInt(1);
auto completion_percentage_value = memgraph::storage::PropertyValue(14);
auto gender = memgraph::storage::PropertyId::FromInt(2);
PropertyStore props;
auto id = PropertyId::FromInt(0);
auto id_value = PropertyValue(1);
auto completion_percentage = PropertyId::FromInt(1);
auto completion_percentage_value = PropertyValue(14);
auto gender = PropertyId::FromInt(2);
// Use big value so that memory for properties is allocated on the heap not on the stack
auto gender_value = memgraph::storage::PropertyValue("man167863816386826");
auto age = memgraph::storage::PropertyId::FromInt(3);
auto age_value = memgraph::storage::PropertyValue(26);
auto gender_value = PropertyValue("man167863816386826");
auto age = PropertyId::FromInt(3);
auto age_value = PropertyValue(26);
ASSERT_TRUE(props.SetProperty(id, id_value));
ASSERT_TRUE(props.SetProperty(age, age_value));
ASSERT_TRUE(props.SetProperty(completion_percentage, completion_percentage_value));
ASSERT_TRUE(props.SetProperty(gender, gender_value));
std::string serialized_props = memgraph::utils::SerializeProperties(props);
memgraph::storage::PropertyStore deserialized_props =
memgraph::storage::PropertyStore::CreateFromBuffer(serialized_props);
PropertyStore deserialized_props = PropertyStore::CreateFromBuffer(serialized_props);
for (const auto &[prop_id, prop_value] : props.Properties()) {
ASSERT_TRUE(deserialized_props.IsPropertyEqual(prop_id, prop_value));
}
}
TEST(RocksDbSerDeSuite, ExtractVertexGidFromVertexKeyNoLabels) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractGidFromKey(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractVertexGidFromVertexKeyWithOneLabel) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
vertex.labels.push_back(LabelId::FromInt(2));
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractGidFromKey(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractVertexGidFromVertexKeyWithMultipleLabels) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
std::vector<unsigned> labels = {2, 3, 4};
for (unsigned label : labels) {
vertex.labels.push_back(LabelId::FromInt(label));
}
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractGidFromKey(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractLabelsFromMainDiskStorageWhenOnlyOneLabel) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
std::vector<unsigned> labels = {2};
std::vector<std::string> expectedLabelsStr = {"2"};
for (unsigned label : labels) {
vertex.labels.push_back(LabelId::FromInt(label));
}
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractLabelsFromMainDiskStorage(serializedVertex), expectedLabelsStr);
}
TEST(RocksDbSerDeSuite, ExtractLabelsFromMainDiskStorageWhenMultipleLabels) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
std::vector<unsigned> labels = {2, 3, 4};
std::vector<std::string> expectedLabelsStr = {"2", "3", "4"};
for (unsigned label : labels) {
vertex.labels.push_back(LabelId::FromInt(label));
}
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractLabelsFromMainDiskStorage(serializedVertex), expectedLabelsStr);
}
TEST(RocksDbSerDeSuite, ExtractVertexGidFromMainDiskStorageNoLabels) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractGidFromMainDiskStorage(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractVertexGidFromMainDiskStorageWithOneLabel) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
vertex.labels.push_back(LabelId::FromInt(2));
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractGidFromMainDiskStorage(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractVertexGidFromMainDiskStorageWithMultipleLabels) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
vertex.labels.push_back(LabelId::FromInt(2));
vertex.labels.push_back(LabelId::FromInt(3));
vertex.labels.push_back(LabelId::FromInt(4));
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractGidFromLabelIndexStorage(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractGidFromLabelIndexStorageKey) {
auto gid = Gid::FromInt(1);
LabelId label = LabelId::FromInt(2);
std::string serializedVertex = memgraph::utils::SerializeVertexAsKeyForLabelIndex(label, gid);
ASSERT_EQ(memgraph::utils::ExtractGidFromLabelIndexStorage(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, DeserializePropertiesFromLabelIndexStorage) {
std::string expectedGid = "1";
LabelId indexingLabel = LabelId::FromInt(2);
std::vector<LabelId> labels = {LabelId::FromInt(3), LabelId::FromInt(4)};
std::map<PropertyId, PropertyValue> properties = {{PropertyId::FromInt(5), PropertyValue("5")},
{PropertyId::FromInt(6), PropertyValue("6")}};
PropertyStore propertyStore;
propertyStore.InitProperties(properties);
std::string serializedVertex =
memgraph::utils::SerializeVertexAsValueForLabelIndex(indexingLabel, labels, propertyStore);
ASSERT_EQ(memgraph::utils::DeserializePropertiesFromLabelIndexStorage(serializedVertex).StringBuffer(),
propertyStore.StringBuffer());
}
TEST(RocksDbSerDeSuite, DeserializePropertiesFromLabelPropertyIndexStorage) {
std::string expectedGid = "1";
LabelId indexingLabel = LabelId::FromInt(2);
std::vector<LabelId> labels = {LabelId::FromInt(3), LabelId::FromInt(4)};
std::map<PropertyId, PropertyValue> properties = {{PropertyId::FromInt(5), PropertyValue("5")},
{PropertyId::FromInt(6), PropertyValue("6")}};
PropertyStore propertyStore;
propertyStore.InitProperties(properties);
std::string serializedVertex =
memgraph::utils::SerializeVertexAsValueForLabelPropertyIndex(indexingLabel, labels, propertyStore);
ASSERT_EQ(memgraph::utils::DeserializePropertiesFromLabelPropertyIndexStorage(serializedVertex).StringBuffer(),
propertyStore.StringBuffer());
}
TEST(RocksDbSerDeSuite, ExtractGidFromLabelPropertyIndexStorageKey) {
auto gid = Gid::FromInt(1);
LabelId label = LabelId::FromInt(2);
PropertyId property = PropertyId::FromInt(3);
std::string serializedVertex = memgraph::utils::SerializeVertexAsKeyForLabelPropertyIndex(label, property, gid);
ASSERT_EQ(memgraph::utils::ExtractGidFromLabelPropertyIndexStorage(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractGidFromUniqueConstraintStorageKey) {
std::string expectedGid = "1";
LabelId constraintLabel = LabelId::FromInt(2);
std::set<PropertyId> properties = {PropertyId::FromInt(3), PropertyId::FromInt(4)};
std::string serializedVertex =
memgraph::utils::SerializeVertexAsKeyForUniqueConstraint(constraintLabel, properties, expectedGid);
ASSERT_EQ(memgraph::utils::ExtractGidFromUniqueConstraintStorage(serializedVertex), expectedGid);
}
TEST(RocksDbSerDeSuite, DeserializeConstraintLabelFromUniqueConstraintStorage) {
std::string expectedGid = "1";
LabelId constraintLabel = LabelId::FromInt(2);
std::set<PropertyId> properties = {PropertyId::FromInt(3), PropertyId::FromInt(4)};
std::string serializedVertex =
memgraph::utils::SerializeVertexAsKeyForUniqueConstraint(constraintLabel, properties, expectedGid);
ASSERT_EQ(memgraph::utils::DeserializeConstraintLabelFromUniqueConstraintStorage(serializedVertex), constraintLabel);
}
TEST(RocksDbSerDeSuite, DeserializePropertiesFromUniqueConstraintStorage) {
std::string expectedGid = "1";
LabelId constraintLabel = LabelId::FromInt(2);
std::vector<LabelId> labels = {LabelId::FromInt(3), LabelId::FromInt(4)};
std::map<PropertyId, PropertyValue> properties = {{PropertyId::FromInt(5), PropertyValue("5")},
{PropertyId::FromInt(6), PropertyValue("6")}};
PropertyStore propertyStore;
propertyStore.InitProperties(properties);
std::string serializedVertex =
memgraph::utils::SerializeVertexAsValueForUniqueConstraint(constraintLabel, labels, propertyStore);
ASSERT_EQ(memgraph::utils::DeserializePropertiesFromUniqueConstraintStorage(serializedVertex).StringBuffer(),
propertyStore.StringBuffer());
}