Optimize splitting keys inside the on-disk storage (#1155)

This commit is contained in:
Aidar Samerkhanov 2023-08-17 19:09:21 +03:00 committed by GitHub
parent 8f3f693f20
commit 3bf2cf65ab
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 248 additions and 39 deletions

View File

@ -141,7 +141,7 @@ declare -A secondary_urls=(
["rocksdb"]="https://github.com/facebook/rocksdb.git" ["rocksdb"]="https://github.com/facebook/rocksdb.git"
["mgclient"]="https://github.com/memgraph/mgclient.git" ["mgclient"]="https://github.com/memgraph/mgclient.git"
["pymgclient"]="https://github.com/memgraph/pymgclient.git" ["pymgclient"]="https://github.com/memgraph/pymgclient.git"
["mgconsole"]="http://github.com/memgraph/mgconsole.git" ["mgconsole"]="https://github.com/memgraph/mgconsole.git"
["spdlog"]="https://github.com/gabime/spdlog" ["spdlog"]="https://github.com/gabime/spdlog"
["nlohmann"]="https://raw.githubusercontent.com/nlohmann/json/4f8fba14066156b73f1189a2b8bd568bde5284c5/single_include/nlohmann/json.hpp" ["nlohmann"]="https://raw.githubusercontent.com/nlohmann/json/4f8fba14066156b73f1189a2b8bd568bde5284c5/single_include/nlohmann/json.hpp"
["neo4j"]="https://dist.neo4j.org/neo4j-community-5.6.0-unix.tar.gz" ["neo4j"]="https://dist.neo4j.org/neo4j-community-5.6.0-unix.tar.gz"

View File

@ -11,6 +11,7 @@
#pragma once #pragma once
#include <charconv>
#include <cstdint> #include <cstdint>
#include <iomanip> #include <iomanip>
#include <iterator> #include <iterator>
@ -30,6 +31,47 @@ namespace memgraph::utils {
static constexpr const char *outEdgeDirection = "0"; static constexpr const char *outEdgeDirection = "0";
static constexpr const char *inEdgeDirection = "1"; static constexpr const char *inEdgeDirection = "1";
namespace {
struct StartEndPositions {
size_t start;
size_t end;
size_t Size() const { return end - start; }
bool Valid() const { return start != std::string::npos && start <= end; }
};
template <typename T>
inline std::string_view FindPartOfStringView(const std::string_view str, const char delim, T partNumber) {
StartEndPositions startEndPos{0, 0};
for (int i = 0; i < partNumber; ++i) {
startEndPos.start = startEndPos.end;
startEndPos.end = str.find(delim, startEndPos.start);
if (i < partNumber - 1) {
if (startEndPos.end == std::string::npos) {
// We didn't find enough parts.
startEndPos.start = std::string::npos;
break;
}
++startEndPos.end;
}
}
return startEndPos.Valid() ? str.substr(startEndPos.start, startEndPos.Size()) : str;
}
inline std::string_view GetViewOfFirstPartOfSplit(const std::string_view src, const char delimiter) {
return FindPartOfStringView(src, delimiter, 1);
}
inline std::string_view GetViewOfSecondPartOfSplit(const std::string_view src, const char delimiter) {
return FindPartOfStringView(src, delimiter, 2);
}
inline std::string_view GetViewOfThirdPartOfSplit(const std::string_view src, const char delimiter) {
return FindPartOfStringView(src, delimiter, 3);
}
} // namespace
/// TODO: try to move this to hpp files so that we can follow jump on readings /// TODO: try to move this to hpp files so that we can follow jump on readings
inline std::string SerializeIdType(const auto &id) { return std::to_string(id.AsUint()); } inline std::string SerializeIdType(const auto &id) { return std::to_string(id.AsUint()); }
@ -111,13 +153,11 @@ inline std::string SerializeVertexAsValueForAuxiliaryStorages(storage::LabelId l
} }
inline std::string ExtractGidFromKey(const std::string &key) { inline std::string ExtractGidFromKey(const std::string &key) {
std::vector<std::string> key_vector = utils::Split(key, "|"); return std::string(GetViewOfSecondPartOfSplit(key, '|'));
return key_vector[1];
} }
inline storage::PropertyStore DeserializePropertiesFromAuxiliaryStorages(const std::string &value) { inline storage::PropertyStore DeserializePropertiesFromAuxiliaryStorages(const std::string &value) {
std::vector<std::string> value_vector = utils::Split(value, "|"); const std::string_view properties_str = GetViewOfSecondPartOfSplit(value, '|');
std::string properties_str = value_vector[1];
return storage::PropertyStore::CreateFromBuffer(properties_str); return storage::PropertyStore::CreateFromBuffer(properties_str);
} }
@ -136,9 +176,7 @@ inline std::vector<storage::LabelId> DeserializeLabelsFromMainDiskStorage(const
} }
inline std::vector<std::string> ExtractLabelsFromMainDiskStorage(const std::string &key) { inline std::vector<std::string> ExtractLabelsFromMainDiskStorage(const std::string &key) {
std::vector<std::string> key_vector = utils::Split(key, "|"); return utils::Split(GetViewOfFirstPartOfSplit(key, '|'), ",");
std::string labels_str = key_vector[0];
return utils::Split(labels_str, ",");
} }
inline storage::PropertyStore DeserializePropertiesFromMainDiskStorage(const std::string_view value) { inline storage::PropertyStore DeserializePropertiesFromMainDiskStorage(const std::string_view value) {
@ -167,10 +205,16 @@ inline std::string SerializeVertexAsValueForUniqueConstraint(const storage::Labe
} }
inline storage::LabelId DeserializeConstraintLabelFromUniqueConstraintStorage(const std::string &key) { inline storage::LabelId DeserializeConstraintLabelFromUniqueConstraintStorage(const std::string &key) {
std::vector<std::string> key_vector = utils::Split(key, "|"); const std::string_view firstPartKey = GetViewOfFirstPartOfSplit(key, '|');
std::vector<std::string> constraint_key = utils::Split(key_vector[0], ","); const std::string_view constraint_key = GetViewOfFirstPartOfSplit(firstPartKey, ',');
/// TODO: andi Change this to deserialization method directly into the LabelId class /// TODO: andi Change this to deserialization method directly into the LabelId class
return storage::LabelId::FromUint(std::stoull(constraint_key[0])); uint64_t labelID = 0;
const char *endOfConstraintKey = constraint_key.data() + constraint_key.size();
auto [ptr, ec] = std::from_chars(constraint_key.data(), endOfConstraintKey, labelID);
if (ec != std::errc() || ptr != endOfConstraintKey) {
throw std::invalid_argument("Failed to deserialize label id from unique constraint storage");
}
return storage::LabelId::FromUint(labelID);
} }
inline storage::PropertyStore DeserializePropertiesFromUniqueConstraintStorage(const std::string &value) { inline storage::PropertyStore DeserializePropertiesFromUniqueConstraintStorage(const std::string &value) {
@ -225,8 +269,7 @@ inline std::string SerializeVertexAsValueForLabelPropertyIndex(storage::LabelId
} }
inline std::string ExtractGidFromLabelPropertyIndexStorage(const std::string &key) { inline std::string ExtractGidFromLabelPropertyIndexStorage(const std::string &key) {
std::vector<std::string> key_vector = utils::Split(key, "|"); return std::string(GetViewOfThirdPartOfSplit(key, '|'));
return key_vector[2];
} }
inline std::vector<storage::LabelId> DeserializeLabelsFromLabelPropertyIndexStorage(const std::string &value) { inline std::vector<storage::LabelId> DeserializeLabelsFromLabelPropertyIndexStorage(const std::string &value) {

View File

@ -1,4 +1,4 @@
// Copyright 2022 Memgraph Ltd. // Copyright 2023 Memgraph Ltd.
// //
// Use of this software is governed by the Business Source License // Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source // included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source

View File

@ -30,6 +30,9 @@
#include "storage/v2/view.hpp" #include "storage/v2/view.hpp"
#include "utils/rocksdb_serialization.hpp" #include "utils/rocksdb_serialization.hpp"
// NOLINTNEXTLINE(google-build-using-namespace)
using namespace memgraph::storage;
/* Tests that serialization of vertices and edges to RocksDB works correctly. /* Tests that serialization of vertices and edges to RocksDB works correctly.
*/ */
class RocksDBStorageTest : public ::testing::TestWithParam<bool> { class RocksDBStorageTest : public ::testing::TestWithParam<bool> {
@ -38,7 +41,7 @@ class RocksDBStorageTest : public ::testing::TestWithParam<bool> {
RocksDBStorageTest() { RocksDBStorageTest() {
config_ = disk_test_utils::GenerateOnDiskConfig(testSuite); config_ = disk_test_utils::GenerateOnDiskConfig(testSuite);
storage = std::make_unique<memgraph::storage::DiskStorage>(config_); storage = std::make_unique<DiskStorage>(config_);
} }
void TearDown() override { void TearDown() override {
@ -49,8 +52,8 @@ class RocksDBStorageTest : public ::testing::TestWithParam<bool> {
~RocksDBStorageTest() override {} ~RocksDBStorageTest() override {}
protected: protected:
std::unique_ptr<memgraph::storage::Storage> storage; std::unique_ptr<Storage> storage;
memgraph::storage::Config config_; Config config_;
}; };
TEST_F(RocksDBStorageTest, SerializeVertexGID) { TEST_F(RocksDBStorageTest, SerializeVertexGID) {
@ -74,22 +77,21 @@ TEST_F(RocksDBStorageTest, SerializeVertexGIDLabels) {
} }
TEST_F(RocksDBStorageTest, SerializePropertiesLocalBuffer) { TEST_F(RocksDBStorageTest, SerializePropertiesLocalBuffer) {
memgraph::storage::PropertyStore props; PropertyStore props;
auto id = memgraph::storage::PropertyId::FromInt(0); auto id = PropertyId::FromInt(0);
auto id_value = memgraph::storage::PropertyValue(1); auto id_value = PropertyValue(1);
auto completion_percentage = memgraph::storage::PropertyId::FromInt(1); auto completion_percentage = PropertyId::FromInt(1);
auto completion_percentage_value = memgraph::storage::PropertyValue(14); auto completion_percentage_value = PropertyValue(14);
auto gender = memgraph::storage::PropertyId::FromInt(2); auto gender = PropertyId::FromInt(2);
auto gender_value = memgraph::storage::PropertyValue("man"); auto gender_value = PropertyValue("man");
auto age = memgraph::storage::PropertyId::FromInt(3); auto age = PropertyId::FromInt(3);
auto age_value = memgraph::storage::PropertyValue(26); auto age_value = PropertyValue(26);
ASSERT_TRUE(props.SetProperty(id, id_value)); ASSERT_TRUE(props.SetProperty(id, id_value));
ASSERT_TRUE(props.SetProperty(age, age_value)); ASSERT_TRUE(props.SetProperty(age, age_value));
ASSERT_TRUE(props.SetProperty(completion_percentage, completion_percentage_value)); ASSERT_TRUE(props.SetProperty(completion_percentage, completion_percentage_value));
ASSERT_TRUE(props.SetProperty(gender, gender_value)); ASSERT_TRUE(props.SetProperty(gender, gender_value));
std::string serialized_props = memgraph::utils::SerializeProperties(props); std::string serialized_props = memgraph::utils::SerializeProperties(props);
memgraph::storage::PropertyStore deserialized_props = PropertyStore deserialized_props = PropertyStore::CreateFromBuffer(serialized_props);
memgraph::storage::PropertyStore::CreateFromBuffer(serialized_props);
for (const auto &[prop_id, prop_value] : props.Properties()) { for (const auto &[prop_id, prop_value] : props.Properties()) {
ASSERT_TRUE(deserialized_props.IsPropertyEqual(prop_id, prop_value)); ASSERT_TRUE(deserialized_props.IsPropertyEqual(prop_id, prop_value));
@ -97,25 +99,189 @@ TEST_F(RocksDBStorageTest, SerializePropertiesLocalBuffer) {
} }
TEST_F(RocksDBStorageTest, SerializePropertiesExternalBuffer) { TEST_F(RocksDBStorageTest, SerializePropertiesExternalBuffer) {
memgraph::storage::PropertyStore props; PropertyStore props;
auto id = memgraph::storage::PropertyId::FromInt(0); auto id = PropertyId::FromInt(0);
auto id_value = memgraph::storage::PropertyValue(1); auto id_value = PropertyValue(1);
auto completion_percentage = memgraph::storage::PropertyId::FromInt(1); auto completion_percentage = PropertyId::FromInt(1);
auto completion_percentage_value = memgraph::storage::PropertyValue(14); auto completion_percentage_value = PropertyValue(14);
auto gender = memgraph::storage::PropertyId::FromInt(2); auto gender = PropertyId::FromInt(2);
// Use big value so that memory for properties is allocated on the heap not on the stack // Use big value so that memory for properties is allocated on the heap not on the stack
auto gender_value = memgraph::storage::PropertyValue("man167863816386826"); auto gender_value = PropertyValue("man167863816386826");
auto age = memgraph::storage::PropertyId::FromInt(3); auto age = PropertyId::FromInt(3);
auto age_value = memgraph::storage::PropertyValue(26); auto age_value = PropertyValue(26);
ASSERT_TRUE(props.SetProperty(id, id_value)); ASSERT_TRUE(props.SetProperty(id, id_value));
ASSERT_TRUE(props.SetProperty(age, age_value)); ASSERT_TRUE(props.SetProperty(age, age_value));
ASSERT_TRUE(props.SetProperty(completion_percentage, completion_percentage_value)); ASSERT_TRUE(props.SetProperty(completion_percentage, completion_percentage_value));
ASSERT_TRUE(props.SetProperty(gender, gender_value)); ASSERT_TRUE(props.SetProperty(gender, gender_value));
std::string serialized_props = memgraph::utils::SerializeProperties(props); std::string serialized_props = memgraph::utils::SerializeProperties(props);
memgraph::storage::PropertyStore deserialized_props = PropertyStore deserialized_props = PropertyStore::CreateFromBuffer(serialized_props);
memgraph::storage::PropertyStore::CreateFromBuffer(serialized_props);
for (const auto &[prop_id, prop_value] : props.Properties()) { for (const auto &[prop_id, prop_value] : props.Properties()) {
ASSERT_TRUE(deserialized_props.IsPropertyEqual(prop_id, prop_value)); ASSERT_TRUE(deserialized_props.IsPropertyEqual(prop_id, prop_value));
} }
} }
TEST(RocksDbSerDeSuite, ExtractVertexGidFromVertexKeyNoLabels) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractGidFromKey(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractVertexGidFromVertexKeyWithOneLabel) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
vertex.labels.push_back(LabelId::FromInt(2));
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractGidFromKey(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractVertexGidFromVertexKeyWithMultipleLabels) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
std::vector<unsigned> labels = {2, 3, 4};
for (unsigned label : labels) {
vertex.labels.push_back(LabelId::FromInt(label));
}
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractGidFromKey(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractLabelsFromMainDiskStorageWhenOnlyOneLabel) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
std::vector<unsigned> labels = {2};
std::vector<std::string> expectedLabelsStr = {"2"};
for (unsigned label : labels) {
vertex.labels.push_back(LabelId::FromInt(label));
}
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractLabelsFromMainDiskStorage(serializedVertex), expectedLabelsStr);
}
TEST(RocksDbSerDeSuite, ExtractLabelsFromMainDiskStorageWhenMultipleLabels) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
std::vector<unsigned> labels = {2, 3, 4};
std::vector<std::string> expectedLabelsStr = {"2", "3", "4"};
for (unsigned label : labels) {
vertex.labels.push_back(LabelId::FromInt(label));
}
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractLabelsFromMainDiskStorage(serializedVertex), expectedLabelsStr);
}
TEST(RocksDbSerDeSuite, ExtractVertexGidFromMainDiskStorageNoLabels) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractGidFromMainDiskStorage(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractVertexGidFromMainDiskStorageWithOneLabel) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
vertex.labels.push_back(LabelId::FromInt(2));
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractGidFromMainDiskStorage(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractVertexGidFromMainDiskStorageWithMultipleLabels) {
auto gid = Gid::FromInt(1);
Vertex vertex(gid, nullptr);
vertex.labels.push_back(LabelId::FromInt(2));
vertex.labels.push_back(LabelId::FromInt(3));
vertex.labels.push_back(LabelId::FromInt(4));
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
ASSERT_EQ(memgraph::utils::ExtractGidFromLabelIndexStorage(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractGidFromLabelIndexStorageKey) {
auto gid = Gid::FromInt(1);
LabelId label = LabelId::FromInt(2);
std::string serializedVertex = memgraph::utils::SerializeVertexAsKeyForLabelIndex(label, gid);
ASSERT_EQ(memgraph::utils::ExtractGidFromLabelIndexStorage(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, DeserializePropertiesFromLabelIndexStorage) {
std::string expectedGid = "1";
LabelId indexingLabel = LabelId::FromInt(2);
std::vector<LabelId> labels = {LabelId::FromInt(3), LabelId::FromInt(4)};
std::map<PropertyId, PropertyValue> properties = {{PropertyId::FromInt(5), PropertyValue("5")},
{PropertyId::FromInt(6), PropertyValue("6")}};
PropertyStore propertyStore;
propertyStore.InitProperties(properties);
std::string serializedVertex =
memgraph::utils::SerializeVertexAsValueForLabelIndex(indexingLabel, labels, propertyStore);
ASSERT_EQ(memgraph::utils::DeserializePropertiesFromLabelIndexStorage(serializedVertex).StringBuffer(),
propertyStore.StringBuffer());
}
TEST(RocksDbSerDeSuite, DeserializePropertiesFromLabelPropertyIndexStorage) {
std::string expectedGid = "1";
LabelId indexingLabel = LabelId::FromInt(2);
std::vector<LabelId> labels = {LabelId::FromInt(3), LabelId::FromInt(4)};
std::map<PropertyId, PropertyValue> properties = {{PropertyId::FromInt(5), PropertyValue("5")},
{PropertyId::FromInt(6), PropertyValue("6")}};
PropertyStore propertyStore;
propertyStore.InitProperties(properties);
std::string serializedVertex =
memgraph::utils::SerializeVertexAsValueForLabelPropertyIndex(indexingLabel, labels, propertyStore);
ASSERT_EQ(memgraph::utils::DeserializePropertiesFromLabelPropertyIndexStorage(serializedVertex).StringBuffer(),
propertyStore.StringBuffer());
}
TEST(RocksDbSerDeSuite, ExtractGidFromLabelPropertyIndexStorageKey) {
auto gid = Gid::FromInt(1);
LabelId label = LabelId::FromInt(2);
PropertyId property = PropertyId::FromInt(3);
std::string serializedVertex = memgraph::utils::SerializeVertexAsKeyForLabelPropertyIndex(label, property, gid);
ASSERT_EQ(memgraph::utils::ExtractGidFromLabelPropertyIndexStorage(serializedVertex), "1");
}
TEST(RocksDbSerDeSuite, ExtractGidFromUniqueConstraintStorageKey) {
std::string expectedGid = "1";
LabelId constraintLabel = LabelId::FromInt(2);
std::set<PropertyId> properties = {PropertyId::FromInt(3), PropertyId::FromInt(4)};
std::string serializedVertex =
memgraph::utils::SerializeVertexAsKeyForUniqueConstraint(constraintLabel, properties, expectedGid);
ASSERT_EQ(memgraph::utils::ExtractGidFromUniqueConstraintStorage(serializedVertex), expectedGid);
}
TEST(RocksDbSerDeSuite, DeserializeConstraintLabelFromUniqueConstraintStorage) {
std::string expectedGid = "1";
LabelId constraintLabel = LabelId::FromInt(2);
std::set<PropertyId> properties = {PropertyId::FromInt(3), PropertyId::FromInt(4)};
std::string serializedVertex =
memgraph::utils::SerializeVertexAsKeyForUniqueConstraint(constraintLabel, properties, expectedGid);
ASSERT_EQ(memgraph::utils::DeserializeConstraintLabelFromUniqueConstraintStorage(serializedVertex), constraintLabel);
}
TEST(RocksDbSerDeSuite, DeserializePropertiesFromUniqueConstraintStorage) {
std::string expectedGid = "1";
LabelId constraintLabel = LabelId::FromInt(2);
std::vector<LabelId> labels = {LabelId::FromInt(3), LabelId::FromInt(4)};
std::map<PropertyId, PropertyValue> properties = {{PropertyId::FromInt(5), PropertyValue("5")},
{PropertyId::FromInt(6), PropertyValue("6")}};
PropertyStore propertyStore;
propertyStore.InitProperties(properties);
std::string serializedVertex =
memgraph::utils::SerializeVertexAsValueForUniqueConstraint(constraintLabel, labels, propertyStore);
ASSERT_EQ(memgraph::utils::DeserializePropertiesFromUniqueConstraintStorage(serializedVertex).StringBuffer(),
propertyStore.StringBuffer());
}