Improve deserialization performance

* Change std::stoull to std::from_chars
---------

Co-authored-by: Aidar Samerkhanov <aidar.samerkhanov@memgraph.io>
This commit is contained in:
Andi 2023-09-20 14:25:17 +02:00 committed by GitHub
parent bce48361ca
commit 1553fcb958
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
11 changed files with 65 additions and 33 deletions

View File

@ -51,7 +51,7 @@ std::vector<std::pair<LabelId, PropertyId>> ExistenceConstraints::ListConstraint
void ExistenceConstraints::LoadExistenceConstraints(const std::vector<std::string> &keys) {
for (const auto &key : keys) {
const std::vector<std::string> parts = utils::Split(key, ",");
constraints_.emplace_back(LabelId::FromUint(std::stoull(parts[0])), PropertyId::FromUint(std::stoull(parts[1])));
constraints_.emplace_back(LabelId::FromString(parts[0]), PropertyId::FromString(parts[1]));
}
}

View File

@ -208,7 +208,7 @@ uint64_t DiskLabelIndex::ApproximateVertexCount(LabelId /*label*/) const { retur
void DiskLabelIndex::LoadIndexInfo(const std::vector<std::string> &labels) {
for (const std::string &label : labels) {
LabelId label_id = LabelId::FromUint(std::stoull(label));
LabelId label_id = LabelId::FromString(label);
index_.insert(label_id);
}
}

View File

@ -216,8 +216,8 @@ uint64_t DiskLabelPropertyIndex::ApproximateVertexCount(
void DiskLabelPropertyIndex::LoadIndexInfo(const std::vector<std::string> &keys) {
for (const auto &label_property : keys) {
std::vector<std::string> label_property_split = utils::Split(label_property, ",");
index_.emplace(std::make_pair(LabelId::FromUint(std::stoull(label_property_split[0])),
PropertyId::FromUint(std::stoull(label_property_split[1]))));
index_.emplace(
std::make_pair(LabelId::FromString(label_property_split[0]), PropertyId::FromString(label_property_split[1])));
}
}

View File

@ -12,6 +12,7 @@
#pragma once
#include <atomic>
#include <charconv>
#include <cstddef>
#include <filesystem>
#include <memory>
@ -43,7 +44,8 @@ class DiskNameIdMapper final : public NameIdMapper {
}
uint64_t res_id = 0;
if (auto maybe_id_from_disk = name_to_id_storage_->Get(std::string(name)); maybe_id_from_disk.has_value()) {
res_id = std::stoull(maybe_id_from_disk.value());
auto id_disk_value = maybe_id_from_disk.value();
res_id = utils::ParseStringToUint64(id_disk_value);
InsertNameIdEntryToCache(std::string(name), res_id);
InsertIdNameEntryToCache(res_id, std::string(name));
} else {
@ -92,14 +94,14 @@ class DiskNameIdMapper final : public NameIdMapper {
void InitializeFromDisk() {
for (auto itr = name_to_id_storage_->begin(); itr != name_to_id_storage_->end(); ++itr) {
auto name = itr->first;
auto id = std::stoull(itr->second);
std::string name = itr->first;
uint64_t id = utils::ParseStringToUint64(itr->second);
InsertNameIdEntryToCache(name, id);
counter_.fetch_add(1, std::memory_order_release);
}
for (auto itr = id_to_name_storage_->begin(); itr != id_to_name_storage_->end(); ++itr) {
auto id = std::stoull(itr->first);
auto name = itr->second;
uint64_t id = utils::ParseStringToUint64(itr->first);
std::string name = itr->second;
InsertIdNameEntryToCache(id, name);
}
}

View File

@ -10,6 +10,7 @@
// licenses/APL.txt.
#include <atomic>
#include <charconv>
#include <cstdint>
#include <limits>
#include <optional>
@ -203,7 +204,8 @@ void DiskStorage::LoadTimestampIfExists() {
return;
}
if (auto last_timestamp_ = durability_kvstore_->Get(lastTransactionStartTimeStamp); last_timestamp_.has_value()) {
timestamp_ = std::stoull(last_timestamp_.value());
auto last_timestamp_value = last_timestamp_.value();
std::from_chars(last_timestamp_value.data(), last_timestamp_value.data() + last_timestamp_value.size(), timestamp_);
}
}
@ -348,7 +350,7 @@ std::optional<storage::VertexAccessor> DiskStorage::DiskAccessor::LoadVertexToMa
std::string &&ts) {
auto main_storage_accessor = vertices_.access();
storage::Gid gid = Gid::FromUint(std::stoull(utils::ExtractGidFromKey(key)));
storage::Gid gid = Gid::FromString(utils::ExtractGidFromKey(key));
if (ObjectExistsInCache(main_storage_accessor, gid)) {
return std::nullopt;
}
@ -361,7 +363,7 @@ std::optional<storage::VertexAccessor> DiskStorage::DiskAccessor::LoadVertexToMa
std::optional<storage::VertexAccessor> DiskStorage::DiskAccessor::LoadVertexToLabelIndexCache(
std::string &&key, std::string &&value, Delta *index_delta,
utils::SkipList<storage::Vertex>::Accessor index_accessor) {
storage::Gid gid = Gid::FromUint(std::stoull(utils::ExtractGidFromLabelIndexStorage(key)));
storage::Gid gid = Gid::FromString(utils::ExtractGidFromLabelIndexStorage(key));
if (ObjectExistsInCache(index_accessor, gid)) {
return std::nullopt;
}
@ -374,7 +376,7 @@ std::optional<storage::VertexAccessor> DiskStorage::DiskAccessor::LoadVertexToLa
std::optional<storage::VertexAccessor> DiskStorage::DiskAccessor::LoadVertexToLabelPropertyIndexCache(
std::string &&key, std::string &&value, Delta *index_delta,
utils::SkipList<storage::Vertex>::Accessor index_accessor) {
storage::Gid gid = Gid::FromUint(std::stoull(utils::ExtractGidFromLabelPropertyIndexStorage(key)));
storage::Gid gid = Gid::FromString(utils::ExtractGidFromLabelPropertyIndexStorage(key));
if (ObjectExistsInCache(index_accessor, gid)) {
return std::nullopt;
}
@ -387,7 +389,7 @@ std::optional<EdgeAccessor> DiskStorage::DiskAccessor::DeserializeEdge(const roc
const rocksdb::Slice &value,
const rocksdb::Slice &ts) {
const auto edge_parts = utils::Split(key.ToStringView(), "|");
const Gid edge_gid = Gid::FromUint(std::stoull(edge_parts[4]));
const Gid edge_gid = Gid::FromString(edge_parts[4]);
auto edge_acc = edges_.access();
auto res = edge_acc.find(edge_gid);
@ -405,12 +407,12 @@ std::optional<EdgeAccessor> DiskStorage::DiskAccessor::DeserializeEdge(const roc
},
edge_parts);
const auto from_acc = FindVertex(Gid::FromUint(std::stoull(from_gid)), View::OLD);
const auto to_acc = FindVertex(Gid::FromUint(std::stoull(to_gid)), View::OLD);
const auto from_acc = FindVertex(Gid::FromString(from_gid), View::OLD);
const auto to_acc = FindVertex(Gid::FromString(to_gid), View::OLD);
if (!from_acc || !to_acc) {
throw utils::BasicException("Non-existing vertices found during edge deserialization");
}
const auto edge_type_id = storage::EdgeTypeId::FromUint(std::stoull(edge_parts[3]));
const auto edge_type_id = storage::EdgeTypeId::FromString(edge_parts[3]);
auto maybe_edge = CreateEdgeFromDisk(&*from_acc, &*to_acc, edge_type_id, edge_gid, value.ToStringView(),
key.ToString(), ts.ToString());
MG_ASSERT(maybe_edge.HasValue());
@ -450,7 +452,7 @@ void DiskStorage::DiskAccessor::LoadVerticesFromMainStorageToEdgeImportCache() {
for (it->SeekToFirst(); it->Valid(); it->Next()) {
std::string key = it->key().ToString();
std::string value = it->value().ToString();
storage::Gid gid = Gid::FromUint(std::stoull(utils::ExtractGidFromMainDiskStorage(key)));
storage::Gid gid = Gid::FromString(utils::ExtractGidFromMainDiskStorage(key));
if (ObjectExistsInCache(cache_accessor, gid)) continue;
std::vector<LabelId> labels_id{utils::DeserializeLabelsFromMainDiskStorage(key)};
@ -485,7 +487,7 @@ void DiskStorage::DiskAccessor::LoadVerticesFromLabelIndexStorageToEdgeImportCac
std::string key = it->key().ToString();
std::string value = it->value().ToString();
if (key.starts_with(label_prefix)) {
storage::Gid gid = Gid::FromUint(std::stoull(utils::ExtractGidFromLabelIndexStorage(key)));
storage::Gid gid = Gid::FromString(utils::ExtractGidFromLabelIndexStorage(key));
if (ObjectExistsInCache(cache_accessor, gid)) continue;
std::vector<LabelId> labels_id{utils::DeserializeLabelsFromLabelIndexStorage(key, value)};
@ -539,7 +541,7 @@ void DiskStorage::DiskAccessor::LoadVerticesFromLabelPropertyIndexStorageToEdgeI
std::string key = it->key().ToString();
std::string value = it->value().ToString();
if (key.starts_with(label_property_prefix)) {
storage::Gid gid = Gid::FromUint(std::stoull(utils::ExtractGidFromLabelPropertyIndexStorage(key)));
storage::Gid gid = Gid::FromString(utils::ExtractGidFromLabelPropertyIndexStorage(key));
if (ObjectExistsInCache(cache_accessor, gid)) continue;
std::vector<LabelId> labels_id{utils::DeserializeLabelsFromLabelPropertyIndexStorage(key, value)};
@ -726,7 +728,7 @@ void DiskStorage::DiskAccessor::LoadVerticesFromDiskLabelIndex(LabelId label,
const auto serialized_label = utils::SerializeIdType(label);
for (index_it->SeekToFirst(); index_it->Valid(); index_it->Next()) {
std::string key = index_it->key().ToString();
Gid curr_gid = Gid::FromUint(std::stoull(utils::ExtractGidFromLabelIndexStorage(key)));
Gid curr_gid = Gid::FromString(utils::ExtractGidFromLabelIndexStorage(key));
spdlog::trace("Loaded vertex with key: {} from label index storage", key);
if (key.starts_with(serialized_label) && !utils::Contains(gids, curr_gid)) {
// We should pass it->timestamp().ToString() instead of "0"
@ -780,7 +782,7 @@ void DiskStorage::DiskAccessor::LoadVerticesFromDiskLabelPropertyIndex(LabelId l
const auto label_property_prefix = utils::SerializeIdType(label) + "|" + utils::SerializeIdType(property);
for (index_it->SeekToFirst(); index_it->Valid(); index_it->Next()) {
std::string key = index_it->key().ToString();
Gid curr_gid = Gid::FromUint(std::stoull(utils::ExtractGidFromLabelPropertyIndexStorage(key)));
Gid curr_gid = Gid::FromString(utils::ExtractGidFromLabelPropertyIndexStorage(key));
/// TODO: optimize
if (label_property_filter(key, label_property_prefix, gids, curr_gid)) {
// We should pass it->timestamp().ToString() instead of "0"
@ -810,7 +812,7 @@ void DiskStorage::DiskAccessor::LoadVerticesFromDiskLabelPropertyIndexWithPointV
for (index_it->SeekToFirst(); index_it->Valid(); index_it->Next()) {
std::string key = index_it->key().ToString();
std::string it_value = index_it->value().ToString();
Gid curr_gid = Gid::FromUint(std::stoull(utils::ExtractGidFromLabelPropertyIndexStorage(key)));
Gid curr_gid = Gid::FromString(utils::ExtractGidFromLabelPropertyIndexStorage(key));
/// TODO: optimize
PropertyStore properties = utils::DeserializePropertiesFromLabelPropertyIndexStorage(it_value);
if (key.starts_with(label_property_prefix) && !utils::Contains(gids, curr_gid) &&
@ -869,7 +871,7 @@ void DiskStorage::DiskAccessor::LoadVerticesFromDiskLabelPropertyIndexForInterva
for (index_it->SeekToFirst(); index_it->Valid(); index_it->Next()) {
std::string key_str = index_it->key().ToString();
std::string it_value_str = index_it->value().ToString();
Gid curr_gid = Gid::FromUint(std::stoull(utils::ExtractGidFromLabelPropertyIndexStorage(key_str)));
Gid curr_gid = Gid::FromString(utils::ExtractGidFromLabelPropertyIndexStorage(key_str));
/// TODO: andi this will be optimized
/// TODO: couple this condition
PropertyStore properties = utils::DeserializePropertiesFromLabelPropertyIndexStorage(it_value_str);
@ -1076,7 +1078,7 @@ std::optional<VertexAccessor> DiskStorage::DiskAccessor::FindVertex(storage::Gid
disk_transaction_->GetIterator(read_opts, disk_storage->kvstore_->vertex_chandle));
for (it->SeekToFirst(); it->Valid(); it->Next()) {
std::string key = it->key().ToString();
if (Gid::FromUint(std::stoull(utils::ExtractGidFromKey(key))) == gid) {
if (Gid::FromString(utils::ExtractGidFromKey(key)) == gid) {
// We should pass it->timestamp().ToString() instead of "0"
// This is hack until RocksDB will support timestamp() in WBWI iterator
return LoadVertexToMainMemoryCache(std::move(key), it->value().ToString(), deserializeTimestamp);

View File

@ -340,10 +340,10 @@ RocksDBStorage *DiskUniqueConstraints::GetRocksDBStorage() const { return kvstor
void DiskUniqueConstraints::LoadUniqueConstraints(const std::vector<std::string> &keys) {
for (const auto &key : keys) {
std::vector<std::string> key_parts = utils::Split(key, ",");
LabelId label = LabelId::FromUint(std::stoull(key_parts[0]));
LabelId label = LabelId::FromString(key_parts[0]);
std::set<PropertyId> properties;
for (int i = 1; i < key_parts.size(); i++) {
properties.insert(PropertyId::FromUint(std::stoull(key_parts[i])));
properties.insert(PropertyId::FromString(key_parts[i]));
}
constraints_.emplace(std::make_pair(label, properties));
}

View File

@ -1,4 +1,4 @@
// Copyright 2022 Memgraph Ltd.
// Copyright 2023 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -11,10 +11,14 @@
#pragma once
#include <charconv>
#include <functional>
#include <system_error>
#include <type_traits>
#include <utils/exceptions.hpp>
#include "utils/cast.hpp"
#include "utils/string.hpp"
namespace memgraph::storage {
@ -31,6 +35,7 @@ namespace memgraph::storage {
static name FromInt(int64_t id) { return name{utils::MemcpyCast<uint64_t>(id)}; } \
uint64_t AsUint() const { return id_; } \
int64_t AsInt() const { return utils::MemcpyCast<int64_t>(id_); } \
static name FromString(std::string_view id) { return name{utils::ParseStringToUint64(id)}; } \
\
private: \
uint64_t id_; \

View File

@ -19,6 +19,7 @@
#include "storage/v2/transaction.hpp"
#include "storage/v2/view.hpp"
#include "utils/rocksdb_serialization.hpp"
#include "utils/string.hpp"
namespace memgraph::storage {
@ -131,13 +132,15 @@ inline Delta *CreateDeleteDeserializedObjectDelta(Transaction *transaction, std:
std::string &&ts) {
transaction->EnsureCommitTimestampExists();
// Should use utils::DecodeFixed64(ts.c_str()) once we will move to RocksDB real timestamps
return &transaction->deltas.use().emplace_back(Delta::DeleteDeserializedObjectTag(), std::stoull(ts), old_disk_key);
uint64_t ts_id = utils::ParseStringToUint64(ts);
return &transaction->deltas.use().emplace_back(Delta::DeleteDeserializedObjectTag(), ts_id, old_disk_key);
}
inline Delta *CreateDeleteDeserializedObjectDelta(std::list<Delta> *deltas, std::optional<std::string> old_disk_key,
std::string &&ts) {
// Should use utils::DecodeFixed64(ts.c_str()) once we will move to RocksDB real timestamps
return &deltas->emplace_back(Delta::DeleteDeserializedObjectTag(), std::stoull(ts), old_disk_key);
uint64_t ts_id = utils::ParseStringToUint64(ts);
return &deltas->emplace_back(Delta::DeleteDeserializedObjectTag(), ts_id, old_disk_key);
}
inline Delta *CreateDeleteDeserializedIndexObjectDelta(std::list<Delta> &deltas,
@ -149,7 +152,8 @@ inline Delta *CreateDeleteDeserializedIndexObjectDelta(std::list<Delta> &deltas,
inline Delta *CreateDeleteDeserializedIndexObjectDelta(std::list<Delta> &deltas,
std::optional<std::string> old_disk_key, const std::string &ts) {
// Should use utils::DecodeFixed64(ts.c_str()) once we will move to RocksDB real timestamps
return CreateDeleteDeserializedIndexObjectDelta(deltas, old_disk_key, std::stoull(ts));
uint64_t ts_id = utils::ParseStringToUint64(ts);
return CreateDeleteDeserializedIndexObjectDelta(deltas, old_disk_key, ts_id);
}
/// This function creates a delta in the transaction for the object and links

View File

@ -148,4 +148,14 @@ class NotYetImplemented final : public BasicException {
: NotYetImplemented(fmt::format(fmt, std::forward<Args>(args)...)) {}
};
class ParseException final : public BasicException {
public:
explicit ParseException(const std::string_view what) noexcept
: BasicException("Parsing failed for string: " + std::string(what)) {}
template <class... Args>
explicit ParseException(fmt::format_string<Args...> fmt, Args &&...args) noexcept
: ParseException(fmt::format(fmt, std::forward<Args>(args)...)) {}
};
} // namespace memgraph::utils

View File

@ -97,7 +97,7 @@ inline std::vector<storage::LabelId> TransformFromStringLabels(std::vector<std::
std::vector<storage::LabelId> transformed_labels;
transformed_labels.reserve(labels.size());
for (const std::string &label : labels) {
transformed_labels.emplace_back(storage::LabelId::FromUint(std::stoull(label)));
transformed_labels.emplace_back(storage::LabelId::FromString(label));
}
return transformed_labels;
}
@ -242,7 +242,7 @@ inline std::vector<storage::LabelId> DeserializeLabelsFromIndexStorage(const std
std::string labels_str{GetViewOfFirstPartOfSplit(value, '|')};
std::vector<storage::LabelId> labels{TransformFromStringLabels(utils::Split(labels_str, ","))};
std::string indexing_label = key.substr(0, key.find('|'));
labels.emplace_back(storage::LabelId::FromUint(std::stoull(indexing_label)));
labels.emplace_back(storage::LabelId::FromString(indexing_label));
return labels;
}

View File

@ -14,6 +14,8 @@
#include <algorithm>
#include <cctype>
#include <charconv>
#include <cstdint>
#include <cstring>
#include <iostream>
#include <iterator>
@ -328,6 +330,13 @@ inline int64_t ParseInt(const std::string_view s) {
return t;
}
inline uint64_t ParseStringToUint64(const std::string_view s) {
if (uint64_t value = 0; std::from_chars(s.data(), s.data() + s.size(), value).ec == std::errc{}) {
return value;
}
throw utils::ParseException(s);
}
/**
* Parse a double floating point value from a string using classic locale.
* Note, the current implementation copies the given string which may perform a