memgraph/src/slk/streams.hpp
2022-03-14 15:47:41 +01:00

122 lines
4.4 KiB
C++

// Copyright 2022 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#pragma once
#include <cstdint>
#include <functional>
#include <limits>
#include "utils/exceptions.hpp"
namespace memgraph::slk {
using SegmentSize = uint32_t;
// The maximum allowed size of a segment is set to `kSegmentMaxDataSize`. The
// value of 256 KiB was chosen so that the segment buffer will always fit on the
// stack (it mustn't be too large) and that it isn't too small so that most SLK
// messages fit into a single segment.
const uint64_t kSegmentMaxDataSize = 262144;
const uint64_t kSegmentMaxTotalSize = kSegmentMaxDataSize + sizeof(SegmentSize) + sizeof(SegmentSize);
static_assert(kSegmentMaxDataSize <= std::numeric_limits<SegmentSize>::max(),
"The SLK segment can't be larger than the type used to store its size!");
/// SLK splits binary data into segments. Segments are used to avoid the need to
/// have all of the encoded data in memory at once during the building process.
/// That enables streaming during the building process and makes the whole
/// process make zero memory allocations because only one static buffer is used.
/// During the reading process you must have all of the data in memory.
///
/// SLK segments are just chunks of binary data. They start with a `size` field
/// and then are followed by the binary data itself. The segments have a maximum
/// size of `kSegmentMaxDataSize`. The `size` field itself has a size of
/// `sizeof(SegmentSize)`. A segment of size 0 indicates that we have reached
/// the end of a stream and that there is no more data to be read/written.
/// Builder used to create a SLK segment stream.
class Builder {
public:
Builder(std::function<void(const uint8_t *, size_t, bool)> write_func);
/// Function used internally by SLK to serialize the data.
void Save(const uint8_t *data, uint64_t size);
/// Function that should be called after all `slk::Save` operations are done.
void Finalize();
private:
void FlushSegment(bool final_segment);
std::function<void(const uint8_t *, size_t, bool)> write_func_;
size_t pos_{0};
uint8_t segment_[kSegmentMaxTotalSize];
};
/// Exception that will be thrown if segments can't be decoded from the byte
/// stream.
class SlkReaderException : public utils::BasicException {
public:
using utils::BasicException::BasicException;
};
/// Reader used to read data from a SLK segment stream.
class Reader {
public:
Reader(const uint8_t *data, size_t size);
/// Function used internally by SLK to deserialize the data.
void Load(uint8_t *data, uint64_t size);
/// Function that should be called after all `slk::Load` operations are done.
void Finalize();
private:
void GetSegment(bool should_be_final = false);
const uint8_t *data_;
size_t size_;
size_t pos_{0};
size_t have_{0};
};
/// Stream status that is returned by the `CheckStreamComplete` function.
enum class StreamStatus {
PARTIAL,
COMPLETE,
INVALID,
};
/// Stream information retuned by the `CheckStreamComplete` function.
struct StreamInfo {
StreamStatus status;
size_t stream_size;
size_t encoded_data_size;
};
/// This function checks the binary stream to see whether it contains a fully
/// received SLK segment stream. The function returns a `StreamInfo` structure
/// that has three members. The `status` member indicates in which state is the
/// received data (partially received, completely received or invalid), the
/// `stream_size` member indicates the size of the data stream (see NOTE) and
/// the `encoded_data_size` member indicates the size of the SLK encoded data in
/// the stream (so far).
/// NOTE: If the stream is partial, the size of the data stream returned will
/// not be the exact size of the received data. It will be a maximum expected
/// size of the data stream. It is used to indicate to the network stack how
/// much data it should receive before it makes sense to retry decoding of the
/// segment data.
StreamInfo CheckStreamComplete(const uint8_t *data, size_t size);
} // namespace memgraph::slk