memgraph/src/query/stream/streams.hpp
János Benjamin Antal 8ea6b48879 Add unit tests
2022-01-21 15:51:51 +01:00

198 lines
7.0 KiB
C++

// Copyright 2022 Memgraph Ltd.
//
// Use of this software is governed by the Business Source License
// included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
// License, and you may not use this file except in compliance with the Business Source License.
//
// As of the Change Date specified in that file, in accordance with
// the Business Source License, use of this software will be governed
// by the Apache License, Version 2.0, included in the file
// licenses/APL.txt.
#pragma once
#include <concepts>
#include <functional>
#include <map>
#include <optional>
#include <type_traits>
#include <unordered_map>
#include <json/json.hpp>
#include "integrations/kafka/consumer.hpp"
#include "kvstore/kvstore.hpp"
#include "query/stream/common.hpp"
#include "query/stream/sources.hpp"
#include "query/typed_value.hpp"
#include "storage/v2/property_value.hpp"
#include "utils/event_counter.hpp"
#include "utils/exceptions.hpp"
#include "utils/rw_lock.hpp"
#include "utils/synchronized.hpp"
class StreamsTest;
namespace query {
struct InterpreterContext;
namespace stream {
class StreamsException : public utils::BasicException {
public:
using BasicException::BasicException;
};
template <typename T>
struct StreamInfo;
template <>
struct StreamInfo<void> {
using Type = CommonStreamInfo;
};
template <Stream TStream>
struct StreamInfo<TStream> {
using Type = typename TStream::StreamInfo;
};
template <typename T>
using StreamInfoType = typename StreamInfo<T>::Type;
template <typename T = void>
struct StreamStatus {
std::string name;
StreamSourceType type;
bool is_running;
StreamInfoType<T> info;
std::optional<std::string> owner;
};
using TransformationResult = std::vector<std::vector<TypedValue>>;
/// Manages Kafka consumers.
///
/// This class is responsible for all query supported actions to happen.
class Streams final {
friend StreamsTest;
public:
/// Initializes the streams.
///
/// @param interpreter_context context to use to run the result of transformations
/// @param directory a directory path to store the persisted streams metadata
Streams(InterpreterContext *interpreter_context, std::filesystem::path directory);
/// Restores the streams from the persisted metadata.
/// The restoration is done in a best effort manner, therefore no exception is thrown on failure, but the error is
/// logged. If a stream was running previously, then after restoration it will be started.
/// This function should only be called when there are no existing streams.
void RestoreStreams();
/// Creates a new import stream.
/// The create implies connecting to the server to get metadata necessary to initialize the stream. This
/// method assures there is no other stream with the same name.
///
/// @param stream_name the name of the stream which can be used to uniquely identify the stream
/// @param stream_info the necessary informations needed to create the Kafka consumer and transform the messages
///
/// @throws StreamsException if the stream with the same name exists or if the creation of Kafka consumer fails
template <Stream TStream>
void Create(const std::string &stream_name, typename TStream::StreamInfo info, std::optional<std::string> owner);
/// Deletes an existing stream and all the data that was persisted.
///
/// @param stream_name name of the stream that needs to be deleted.
///
/// @throws StreamsException if the stream doesn't exist or if the persisted metadata can't be deleted.
void Drop(const std::string &stream_name);
/// Start consuming from a stream.
///
/// @param stream_name name of the stream that needs to be started
///
/// @throws StreamsException if the stream doesn't exist or if the metadata cannot be persisted
/// @throws ConsumerRunningException if the consumer is already running
void Start(const std::string &stream_name);
/// Stop consuming from a stream.
///
/// @param stream_name name of the stream that needs to be stopped
///
/// @throws StreamsException if the stream doesn't exist or if the metadata cannot be persisted
/// @throws ConsumerStoppedException if the consumer is already stopped
void Stop(const std::string &stream_name);
/// Start consuming from all streams that are stopped.
///
/// @throws StreamsException if the metadata cannot be persisted
void StartAll();
/// Stop consuming from all streams that are running.
///
/// @throws StreamsException if the metadata cannot be persisted
void StopAll();
/// Return current status for all streams.
/// It might happend that the is_running field is out of date if the one of the streams stops during the invocation of
/// this function because of an error.
std::vector<StreamStatus<>> GetStreamInfo() const;
/// Do a dry-run consume from a stream.
///
/// @param stream_name name of the stream we want to test
/// @param batch_limit number of batches we want to test before stopping
///
/// @returns A vector of vectors of TypedValue. Each subvector contains two elements, the query string and the
/// nullable parameters map.
///
/// @throws StreamsException if the stream doesn't exist
/// @throws ConsumerRunningException if the consumer is alredy running
/// @throws ConsumerCheckFailedException if the transformation function throws any std::exception during processing
TransformationResult Check(const std::string &stream_name,
std::optional<std::chrono::milliseconds> timeout = std::nullopt,
std::optional<int64_t> batch_limit = std::nullopt) const;
/// Return the configuration value passed to memgraph.
std::string_view BootstrapServers() const;
private:
template <Stream TStream>
using SynchronizedStreamSource = utils::Synchronized<TStream, utils::WritePrioritizedRWLock>;
template <Stream TStream>
struct StreamData {
std::string transformation_name;
std::optional<std::string> owner;
std::unique_ptr<SynchronizedStreamSource<TStream>> stream_source;
};
using StreamDataVariant = std::variant<StreamData<KafkaStream>, StreamData<PulsarStream>>;
using StreamsMap = std::unordered_map<std::string, StreamDataVariant>;
using SynchronizedStreamsMap = utils::Synchronized<StreamsMap, utils::WritePrioritizedRWLock>;
template <Stream TStream>
StreamsMap::iterator CreateConsumer(StreamsMap &map, const std::string &stream_name,
typename TStream::StreamInfo stream_info, std::optional<std::string> owner);
template <Stream TStream>
void Persist(StreamStatus<TStream> &&status) {
const std::string stream_name = status.name;
if (!storage_.Put(stream_name, nlohmann::json(std::move(status)).dump())) {
throw StreamsException{"Couldn't persist steam data for stream '{}'", stream_name};
}
}
void RegisterProcedures();
void RegisterKafkaProcedures();
void RegisterPulsarProcedures();
InterpreterContext *interpreter_context_;
kvstore::KVStore storage_;
SynchronizedStreamsMap streams_;
};
} // namespace stream
} // namespace query