Optimize splitting keys inside the on-disk storage (#1155)
This commit is contained in:
parent
8f3f693f20
commit
3bf2cf65ab
@ -141,7 +141,7 @@ declare -A secondary_urls=(
|
||||
["rocksdb"]="https://github.com/facebook/rocksdb.git"
|
||||
["mgclient"]="https://github.com/memgraph/mgclient.git"
|
||||
["pymgclient"]="https://github.com/memgraph/pymgclient.git"
|
||||
["mgconsole"]="http://github.com/memgraph/mgconsole.git"
|
||||
["mgconsole"]="https://github.com/memgraph/mgconsole.git"
|
||||
["spdlog"]="https://github.com/gabime/spdlog"
|
||||
["nlohmann"]="https://raw.githubusercontent.com/nlohmann/json/4f8fba14066156b73f1189a2b8bd568bde5284c5/single_include/nlohmann/json.hpp"
|
||||
["neo4j"]="https://dist.neo4j.org/neo4j-community-5.6.0-unix.tar.gz"
|
||||
|
@ -11,6 +11,7 @@
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <charconv>
|
||||
#include <cstdint>
|
||||
#include <iomanip>
|
||||
#include <iterator>
|
||||
@ -30,6 +31,47 @@ namespace memgraph::utils {
|
||||
static constexpr const char *outEdgeDirection = "0";
|
||||
static constexpr const char *inEdgeDirection = "1";
|
||||
|
||||
namespace {
|
||||
struct StartEndPositions {
|
||||
size_t start;
|
||||
size_t end;
|
||||
|
||||
size_t Size() const { return end - start; }
|
||||
bool Valid() const { return start != std::string::npos && start <= end; }
|
||||
};
|
||||
|
||||
template <typename T>
|
||||
inline std::string_view FindPartOfStringView(const std::string_view str, const char delim, T partNumber) {
|
||||
StartEndPositions startEndPos{0, 0};
|
||||
for (int i = 0; i < partNumber; ++i) {
|
||||
startEndPos.start = startEndPos.end;
|
||||
startEndPos.end = str.find(delim, startEndPos.start);
|
||||
if (i < partNumber - 1) {
|
||||
if (startEndPos.end == std::string::npos) {
|
||||
// We didn't find enough parts.
|
||||
startEndPos.start = std::string::npos;
|
||||
break;
|
||||
}
|
||||
++startEndPos.end;
|
||||
}
|
||||
}
|
||||
return startEndPos.Valid() ? str.substr(startEndPos.start, startEndPos.Size()) : str;
|
||||
}
|
||||
|
||||
inline std::string_view GetViewOfFirstPartOfSplit(const std::string_view src, const char delimiter) {
|
||||
return FindPartOfStringView(src, delimiter, 1);
|
||||
}
|
||||
|
||||
inline std::string_view GetViewOfSecondPartOfSplit(const std::string_view src, const char delimiter) {
|
||||
return FindPartOfStringView(src, delimiter, 2);
|
||||
}
|
||||
|
||||
inline std::string_view GetViewOfThirdPartOfSplit(const std::string_view src, const char delimiter) {
|
||||
return FindPartOfStringView(src, delimiter, 3);
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
/// TODO: try to move this to hpp files so that we can follow jump on readings
|
||||
|
||||
inline std::string SerializeIdType(const auto &id) { return std::to_string(id.AsUint()); }
|
||||
@ -111,13 +153,11 @@ inline std::string SerializeVertexAsValueForAuxiliaryStorages(storage::LabelId l
|
||||
}
|
||||
|
||||
inline std::string ExtractGidFromKey(const std::string &key) {
|
||||
std::vector<std::string> key_vector = utils::Split(key, "|");
|
||||
return key_vector[1];
|
||||
return std::string(GetViewOfSecondPartOfSplit(key, '|'));
|
||||
}
|
||||
|
||||
inline storage::PropertyStore DeserializePropertiesFromAuxiliaryStorages(const std::string &value) {
|
||||
std::vector<std::string> value_vector = utils::Split(value, "|");
|
||||
std::string properties_str = value_vector[1];
|
||||
const std::string_view properties_str = GetViewOfSecondPartOfSplit(value, '|');
|
||||
return storage::PropertyStore::CreateFromBuffer(properties_str);
|
||||
}
|
||||
|
||||
@ -136,9 +176,7 @@ inline std::vector<storage::LabelId> DeserializeLabelsFromMainDiskStorage(const
|
||||
}
|
||||
|
||||
inline std::vector<std::string> ExtractLabelsFromMainDiskStorage(const std::string &key) {
|
||||
std::vector<std::string> key_vector = utils::Split(key, "|");
|
||||
std::string labels_str = key_vector[0];
|
||||
return utils::Split(labels_str, ",");
|
||||
return utils::Split(GetViewOfFirstPartOfSplit(key, '|'), ",");
|
||||
}
|
||||
|
||||
inline storage::PropertyStore DeserializePropertiesFromMainDiskStorage(const std::string_view value) {
|
||||
@ -167,10 +205,16 @@ inline std::string SerializeVertexAsValueForUniqueConstraint(const storage::Labe
|
||||
}
|
||||
|
||||
inline storage::LabelId DeserializeConstraintLabelFromUniqueConstraintStorage(const std::string &key) {
|
||||
std::vector<std::string> key_vector = utils::Split(key, "|");
|
||||
std::vector<std::string> constraint_key = utils::Split(key_vector[0], ",");
|
||||
const std::string_view firstPartKey = GetViewOfFirstPartOfSplit(key, '|');
|
||||
const std::string_view constraint_key = GetViewOfFirstPartOfSplit(firstPartKey, ',');
|
||||
/// TODO: andi Change this to deserialization method directly into the LabelId class
|
||||
return storage::LabelId::FromUint(std::stoull(constraint_key[0]));
|
||||
uint64_t labelID = 0;
|
||||
const char *endOfConstraintKey = constraint_key.data() + constraint_key.size();
|
||||
auto [ptr, ec] = std::from_chars(constraint_key.data(), endOfConstraintKey, labelID);
|
||||
if (ec != std::errc() || ptr != endOfConstraintKey) {
|
||||
throw std::invalid_argument("Failed to deserialize label id from unique constraint storage");
|
||||
}
|
||||
return storage::LabelId::FromUint(labelID);
|
||||
}
|
||||
|
||||
inline storage::PropertyStore DeserializePropertiesFromUniqueConstraintStorage(const std::string &value) {
|
||||
@ -225,8 +269,7 @@ inline std::string SerializeVertexAsValueForLabelPropertyIndex(storage::LabelId
|
||||
}
|
||||
|
||||
inline std::string ExtractGidFromLabelPropertyIndexStorage(const std::string &key) {
|
||||
std::vector<std::string> key_vector = utils::Split(key, "|");
|
||||
return key_vector[2];
|
||||
return std::string(GetViewOfThirdPartOfSplit(key, '|'));
|
||||
}
|
||||
|
||||
inline std::vector<storage::LabelId> DeserializeLabelsFromLabelPropertyIndexStorage(const std::string &value) {
|
||||
|
@ -1,4 +1,4 @@
|
||||
// Copyright 2022 Memgraph Ltd.
|
||||
// Copyright 2023 Memgraph Ltd.
|
||||
//
|
||||
// Use of this software is governed by the Business Source License
|
||||
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
||||
|
@ -30,6 +30,9 @@
|
||||
#include "storage/v2/view.hpp"
|
||||
#include "utils/rocksdb_serialization.hpp"
|
||||
|
||||
// NOLINTNEXTLINE(google-build-using-namespace)
|
||||
using namespace memgraph::storage;
|
||||
|
||||
/* Tests that serialization of vertices and edges to RocksDB works correctly.
|
||||
*/
|
||||
class RocksDBStorageTest : public ::testing::TestWithParam<bool> {
|
||||
@ -38,7 +41,7 @@ class RocksDBStorageTest : public ::testing::TestWithParam<bool> {
|
||||
|
||||
RocksDBStorageTest() {
|
||||
config_ = disk_test_utils::GenerateOnDiskConfig(testSuite);
|
||||
storage = std::make_unique<memgraph::storage::DiskStorage>(config_);
|
||||
storage = std::make_unique<DiskStorage>(config_);
|
||||
}
|
||||
|
||||
void TearDown() override {
|
||||
@ -49,8 +52,8 @@ class RocksDBStorageTest : public ::testing::TestWithParam<bool> {
|
||||
~RocksDBStorageTest() override {}
|
||||
|
||||
protected:
|
||||
std::unique_ptr<memgraph::storage::Storage> storage;
|
||||
memgraph::storage::Config config_;
|
||||
std::unique_ptr<Storage> storage;
|
||||
Config config_;
|
||||
};
|
||||
|
||||
TEST_F(RocksDBStorageTest, SerializeVertexGID) {
|
||||
@ -74,22 +77,21 @@ TEST_F(RocksDBStorageTest, SerializeVertexGIDLabels) {
|
||||
}
|
||||
|
||||
TEST_F(RocksDBStorageTest, SerializePropertiesLocalBuffer) {
|
||||
memgraph::storage::PropertyStore props;
|
||||
auto id = memgraph::storage::PropertyId::FromInt(0);
|
||||
auto id_value = memgraph::storage::PropertyValue(1);
|
||||
auto completion_percentage = memgraph::storage::PropertyId::FromInt(1);
|
||||
auto completion_percentage_value = memgraph::storage::PropertyValue(14);
|
||||
auto gender = memgraph::storage::PropertyId::FromInt(2);
|
||||
auto gender_value = memgraph::storage::PropertyValue("man");
|
||||
auto age = memgraph::storage::PropertyId::FromInt(3);
|
||||
auto age_value = memgraph::storage::PropertyValue(26);
|
||||
PropertyStore props;
|
||||
auto id = PropertyId::FromInt(0);
|
||||
auto id_value = PropertyValue(1);
|
||||
auto completion_percentage = PropertyId::FromInt(1);
|
||||
auto completion_percentage_value = PropertyValue(14);
|
||||
auto gender = PropertyId::FromInt(2);
|
||||
auto gender_value = PropertyValue("man");
|
||||
auto age = PropertyId::FromInt(3);
|
||||
auto age_value = PropertyValue(26);
|
||||
ASSERT_TRUE(props.SetProperty(id, id_value));
|
||||
ASSERT_TRUE(props.SetProperty(age, age_value));
|
||||
ASSERT_TRUE(props.SetProperty(completion_percentage, completion_percentage_value));
|
||||
ASSERT_TRUE(props.SetProperty(gender, gender_value));
|
||||
std::string serialized_props = memgraph::utils::SerializeProperties(props);
|
||||
memgraph::storage::PropertyStore deserialized_props =
|
||||
memgraph::storage::PropertyStore::CreateFromBuffer(serialized_props);
|
||||
PropertyStore deserialized_props = PropertyStore::CreateFromBuffer(serialized_props);
|
||||
|
||||
for (const auto &[prop_id, prop_value] : props.Properties()) {
|
||||
ASSERT_TRUE(deserialized_props.IsPropertyEqual(prop_id, prop_value));
|
||||
@ -97,25 +99,189 @@ TEST_F(RocksDBStorageTest, SerializePropertiesLocalBuffer) {
|
||||
}
|
||||
|
||||
TEST_F(RocksDBStorageTest, SerializePropertiesExternalBuffer) {
|
||||
memgraph::storage::PropertyStore props;
|
||||
auto id = memgraph::storage::PropertyId::FromInt(0);
|
||||
auto id_value = memgraph::storage::PropertyValue(1);
|
||||
auto completion_percentage = memgraph::storage::PropertyId::FromInt(1);
|
||||
auto completion_percentage_value = memgraph::storage::PropertyValue(14);
|
||||
auto gender = memgraph::storage::PropertyId::FromInt(2);
|
||||
PropertyStore props;
|
||||
auto id = PropertyId::FromInt(0);
|
||||
auto id_value = PropertyValue(1);
|
||||
auto completion_percentage = PropertyId::FromInt(1);
|
||||
auto completion_percentage_value = PropertyValue(14);
|
||||
auto gender = PropertyId::FromInt(2);
|
||||
// Use big value so that memory for properties is allocated on the heap not on the stack
|
||||
auto gender_value = memgraph::storage::PropertyValue("man167863816386826");
|
||||
auto age = memgraph::storage::PropertyId::FromInt(3);
|
||||
auto age_value = memgraph::storage::PropertyValue(26);
|
||||
auto gender_value = PropertyValue("man167863816386826");
|
||||
auto age = PropertyId::FromInt(3);
|
||||
auto age_value = PropertyValue(26);
|
||||
ASSERT_TRUE(props.SetProperty(id, id_value));
|
||||
ASSERT_TRUE(props.SetProperty(age, age_value));
|
||||
ASSERT_TRUE(props.SetProperty(completion_percentage, completion_percentage_value));
|
||||
ASSERT_TRUE(props.SetProperty(gender, gender_value));
|
||||
std::string serialized_props = memgraph::utils::SerializeProperties(props);
|
||||
memgraph::storage::PropertyStore deserialized_props =
|
||||
memgraph::storage::PropertyStore::CreateFromBuffer(serialized_props);
|
||||
PropertyStore deserialized_props = PropertyStore::CreateFromBuffer(serialized_props);
|
||||
|
||||
for (const auto &[prop_id, prop_value] : props.Properties()) {
|
||||
ASSERT_TRUE(deserialized_props.IsPropertyEqual(prop_id, prop_value));
|
||||
}
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, ExtractVertexGidFromVertexKeyNoLabels) {
|
||||
auto gid = Gid::FromInt(1);
|
||||
Vertex vertex(gid, nullptr);
|
||||
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::ExtractGidFromKey(serializedVertex), "1");
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, ExtractVertexGidFromVertexKeyWithOneLabel) {
|
||||
auto gid = Gid::FromInt(1);
|
||||
Vertex vertex(gid, nullptr);
|
||||
vertex.labels.push_back(LabelId::FromInt(2));
|
||||
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::ExtractGidFromKey(serializedVertex), "1");
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, ExtractVertexGidFromVertexKeyWithMultipleLabels) {
|
||||
auto gid = Gid::FromInt(1);
|
||||
Vertex vertex(gid, nullptr);
|
||||
std::vector<unsigned> labels = {2, 3, 4};
|
||||
for (unsigned label : labels) {
|
||||
vertex.labels.push_back(LabelId::FromInt(label));
|
||||
}
|
||||
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::ExtractGidFromKey(serializedVertex), "1");
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, ExtractLabelsFromMainDiskStorageWhenOnlyOneLabel) {
|
||||
auto gid = Gid::FromInt(1);
|
||||
Vertex vertex(gid, nullptr);
|
||||
std::vector<unsigned> labels = {2};
|
||||
std::vector<std::string> expectedLabelsStr = {"2"};
|
||||
for (unsigned label : labels) {
|
||||
vertex.labels.push_back(LabelId::FromInt(label));
|
||||
}
|
||||
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::ExtractLabelsFromMainDiskStorage(serializedVertex), expectedLabelsStr);
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, ExtractLabelsFromMainDiskStorageWhenMultipleLabels) {
|
||||
auto gid = Gid::FromInt(1);
|
||||
Vertex vertex(gid, nullptr);
|
||||
std::vector<unsigned> labels = {2, 3, 4};
|
||||
std::vector<std::string> expectedLabelsStr = {"2", "3", "4"};
|
||||
for (unsigned label : labels) {
|
||||
vertex.labels.push_back(LabelId::FromInt(label));
|
||||
}
|
||||
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::ExtractLabelsFromMainDiskStorage(serializedVertex), expectedLabelsStr);
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, ExtractVertexGidFromMainDiskStorageNoLabels) {
|
||||
auto gid = Gid::FromInt(1);
|
||||
Vertex vertex(gid, nullptr);
|
||||
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::ExtractGidFromMainDiskStorage(serializedVertex), "1");
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, ExtractVertexGidFromMainDiskStorageWithOneLabel) {
|
||||
auto gid = Gid::FromInt(1);
|
||||
Vertex vertex(gid, nullptr);
|
||||
vertex.labels.push_back(LabelId::FromInt(2));
|
||||
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::ExtractGidFromMainDiskStorage(serializedVertex), "1");
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, ExtractVertexGidFromMainDiskStorageWithMultipleLabels) {
|
||||
auto gid = Gid::FromInt(1);
|
||||
Vertex vertex(gid, nullptr);
|
||||
vertex.labels.push_back(LabelId::FromInt(2));
|
||||
vertex.labels.push_back(LabelId::FromInt(3));
|
||||
vertex.labels.push_back(LabelId::FromInt(4));
|
||||
std::string serializedVertex = memgraph::utils::SerializeVertex(vertex);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::ExtractGidFromLabelIndexStorage(serializedVertex), "1");
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, ExtractGidFromLabelIndexStorageKey) {
|
||||
auto gid = Gid::FromInt(1);
|
||||
LabelId label = LabelId::FromInt(2);
|
||||
std::string serializedVertex = memgraph::utils::SerializeVertexAsKeyForLabelIndex(label, gid);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::ExtractGidFromLabelIndexStorage(serializedVertex), "1");
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, DeserializePropertiesFromLabelIndexStorage) {
|
||||
std::string expectedGid = "1";
|
||||
LabelId indexingLabel = LabelId::FromInt(2);
|
||||
std::vector<LabelId> labels = {LabelId::FromInt(3), LabelId::FromInt(4)};
|
||||
std::map<PropertyId, PropertyValue> properties = {{PropertyId::FromInt(5), PropertyValue("5")},
|
||||
{PropertyId::FromInt(6), PropertyValue("6")}};
|
||||
PropertyStore propertyStore;
|
||||
propertyStore.InitProperties(properties);
|
||||
std::string serializedVertex =
|
||||
memgraph::utils::SerializeVertexAsValueForLabelIndex(indexingLabel, labels, propertyStore);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::DeserializePropertiesFromLabelIndexStorage(serializedVertex).StringBuffer(),
|
||||
propertyStore.StringBuffer());
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, DeserializePropertiesFromLabelPropertyIndexStorage) {
|
||||
std::string expectedGid = "1";
|
||||
LabelId indexingLabel = LabelId::FromInt(2);
|
||||
std::vector<LabelId> labels = {LabelId::FromInt(3), LabelId::FromInt(4)};
|
||||
std::map<PropertyId, PropertyValue> properties = {{PropertyId::FromInt(5), PropertyValue("5")},
|
||||
{PropertyId::FromInt(6), PropertyValue("6")}};
|
||||
PropertyStore propertyStore;
|
||||
propertyStore.InitProperties(properties);
|
||||
std::string serializedVertex =
|
||||
memgraph::utils::SerializeVertexAsValueForLabelPropertyIndex(indexingLabel, labels, propertyStore);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::DeserializePropertiesFromLabelPropertyIndexStorage(serializedVertex).StringBuffer(),
|
||||
propertyStore.StringBuffer());
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, ExtractGidFromLabelPropertyIndexStorageKey) {
|
||||
auto gid = Gid::FromInt(1);
|
||||
LabelId label = LabelId::FromInt(2);
|
||||
PropertyId property = PropertyId::FromInt(3);
|
||||
std::string serializedVertex = memgraph::utils::SerializeVertexAsKeyForLabelPropertyIndex(label, property, gid);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::ExtractGidFromLabelPropertyIndexStorage(serializedVertex), "1");
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, ExtractGidFromUniqueConstraintStorageKey) {
|
||||
std::string expectedGid = "1";
|
||||
LabelId constraintLabel = LabelId::FromInt(2);
|
||||
std::set<PropertyId> properties = {PropertyId::FromInt(3), PropertyId::FromInt(4)};
|
||||
std::string serializedVertex =
|
||||
memgraph::utils::SerializeVertexAsKeyForUniqueConstraint(constraintLabel, properties, expectedGid);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::ExtractGidFromUniqueConstraintStorage(serializedVertex), expectedGid);
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, DeserializeConstraintLabelFromUniqueConstraintStorage) {
|
||||
std::string expectedGid = "1";
|
||||
LabelId constraintLabel = LabelId::FromInt(2);
|
||||
std::set<PropertyId> properties = {PropertyId::FromInt(3), PropertyId::FromInt(4)};
|
||||
std::string serializedVertex =
|
||||
memgraph::utils::SerializeVertexAsKeyForUniqueConstraint(constraintLabel, properties, expectedGid);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::DeserializeConstraintLabelFromUniqueConstraintStorage(serializedVertex), constraintLabel);
|
||||
}
|
||||
|
||||
TEST(RocksDbSerDeSuite, DeserializePropertiesFromUniqueConstraintStorage) {
|
||||
std::string expectedGid = "1";
|
||||
LabelId constraintLabel = LabelId::FromInt(2);
|
||||
std::vector<LabelId> labels = {LabelId::FromInt(3), LabelId::FromInt(4)};
|
||||
std::map<PropertyId, PropertyValue> properties = {{PropertyId::FromInt(5), PropertyValue("5")},
|
||||
{PropertyId::FromInt(6), PropertyValue("6")}};
|
||||
PropertyStore propertyStore;
|
||||
propertyStore.InitProperties(properties);
|
||||
std::string serializedVertex =
|
||||
memgraph::utils::SerializeVertexAsValueForUniqueConstraint(constraintLabel, labels, propertyStore);
|
||||
|
||||
ASSERT_EQ(memgraph::utils::DeserializePropertiesFromUniqueConstraintStorage(serializedVertex).StringBuffer(),
|
||||
propertyStore.StringBuffer());
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user