2021-10-26 14:53:56 +08:00
|
|
|
# Copyright 2021 Memgraph Ltd.
|
|
|
|
#
|
|
|
|
# Use of this software is governed by the Business Source License
|
|
|
|
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
|
|
|
|
# License, and you may not use this file except in compliance with the Business Source License.
|
|
|
|
#
|
|
|
|
# As of the Change Date specified in that file, in accordance with
|
|
|
|
# the Business Source License, use of this software will be governed
|
|
|
|
# by the Apache License, Version 2.0, included in the file
|
|
|
|
# licenses/APL.txt.
|
|
|
|
|
2021-07-22 22:22:08 +08:00
|
|
|
import time
|
2023-12-21 03:03:06 +08:00
|
|
|
from multiprocessing import Manager, Process, Value
|
2021-07-22 22:22:08 +08:00
|
|
|
|
2023-12-21 03:03:06 +08:00
|
|
|
import mgclient
|
|
|
|
import pytest
|
2022-07-08 16:47:18 +08:00
|
|
|
from mg_utils import mg_sleep_and_assert
|
2021-11-15 23:23:49 +08:00
|
|
|
|
2021-07-22 22:22:08 +08:00
|
|
|
# These are the indices of the different values in the result of SHOW STREAM
|
|
|
|
# query
|
|
|
|
NAME = 0
|
2021-11-16 01:25:23 +08:00
|
|
|
TYPE = 1
|
|
|
|
BATCH_INTERVAL = 2
|
|
|
|
BATCH_SIZE = 3
|
|
|
|
TRANSFORM = 4
|
|
|
|
OWNER = 5
|
|
|
|
IS_RUNNING = 6
|
2021-07-22 22:22:08 +08:00
|
|
|
|
2021-11-15 23:23:49 +08:00
|
|
|
# These are the indices of the query and parameters in the result of CHECK
|
|
|
|
# STREAM query
|
2022-06-09 05:17:44 +08:00
|
|
|
QUERIES = 0
|
|
|
|
RAWMESSAGES = 1
|
|
|
|
PARAMETERS_LITERAL = "parameters"
|
|
|
|
QUERY_LITERAL = "query"
|
2021-11-15 23:23:49 +08:00
|
|
|
|
|
|
|
SIMPLE_MSG = b"message"
|
|
|
|
|
2021-07-22 22:22:08 +08:00
|
|
|
|
|
|
|
def execute_and_fetch_all(cursor, query):
|
|
|
|
cursor.execute(query)
|
|
|
|
return cursor.fetchall()
|
|
|
|
|
|
|
|
|
|
|
|
def connect(**kwargs):
|
|
|
|
connection = mgclient.connect(host="localhost", port=7687, **kwargs)
|
|
|
|
connection.autocommit = True
|
|
|
|
return connection
|
|
|
|
|
|
|
|
|
|
|
|
def timed_wait(fun):
|
|
|
|
start_time = time.time()
|
2022-06-09 05:17:44 +08:00
|
|
|
SECONDS = 10
|
2021-07-22 22:22:08 +08:00
|
|
|
|
|
|
|
while True:
|
|
|
|
current_time = time.time()
|
|
|
|
elapsed_time = current_time - start_time
|
|
|
|
|
2022-06-09 05:17:44 +08:00
|
|
|
if elapsed_time > SECONDS:
|
2021-07-22 22:22:08 +08:00
|
|
|
return False
|
|
|
|
|
|
|
|
if fun():
|
|
|
|
return True
|
|
|
|
|
|
|
|
time.sleep(0.1)
|
|
|
|
|
|
|
|
|
|
|
|
def check_one_result_row(cursor, query):
|
|
|
|
start_time = time.time()
|
2022-06-09 05:17:44 +08:00
|
|
|
SECONDS = 10
|
2021-07-22 22:22:08 +08:00
|
|
|
|
|
|
|
while True:
|
|
|
|
current_time = time.time()
|
|
|
|
elapsed_time = current_time - start_time
|
|
|
|
|
2022-06-09 05:17:44 +08:00
|
|
|
if elapsed_time > SECONDS:
|
2021-07-22 22:22:08 +08:00
|
|
|
return False
|
|
|
|
|
|
|
|
cursor.execute(query)
|
|
|
|
results = cursor.fetchall()
|
|
|
|
if len(results) < 1:
|
|
|
|
time.sleep(0.1)
|
|
|
|
continue
|
|
|
|
|
|
|
|
return len(results) == 1
|
|
|
|
|
|
|
|
|
2021-11-15 23:23:49 +08:00
|
|
|
def check_vertex_exists_with_properties(cursor, properties):
|
2022-06-09 05:17:44 +08:00
|
|
|
properties_string = ", ".join([f"{k}: {v}" for k, v in properties.items()])
|
2021-10-27 15:06:02 +08:00
|
|
|
assert check_one_result_row(
|
|
|
|
cursor,
|
2022-06-09 05:17:44 +08:00
|
|
|
f"MATCH (n: MESSAGE {{{properties_string}}}) RETURN n",
|
2021-10-27 15:06:02 +08:00
|
|
|
)
|
2021-07-22 22:22:08 +08:00
|
|
|
|
|
|
|
|
|
|
|
def get_stream_info(cursor, stream_name):
|
|
|
|
stream_infos = execute_and_fetch_all(cursor, "SHOW STREAMS")
|
|
|
|
for stream_info in stream_infos:
|
2021-10-27 15:06:02 +08:00
|
|
|
if stream_info[NAME] == stream_name:
|
2021-07-22 22:22:08 +08:00
|
|
|
return stream_info
|
|
|
|
|
|
|
|
return None
|
|
|
|
|
|
|
|
|
|
|
|
def get_is_running(cursor, stream_name):
|
|
|
|
stream_info = get_stream_info(cursor, stream_name)
|
2023-12-21 03:03:06 +08:00
|
|
|
assert stream_info is not None
|
2021-07-22 22:22:08 +08:00
|
|
|
return stream_info[IS_RUNNING]
|
|
|
|
|
|
|
|
|
2023-12-21 03:03:06 +08:00
|
|
|
def create_stream(
|
|
|
|
cursor,
|
|
|
|
stream_name,
|
|
|
|
topics,
|
|
|
|
transformation,
|
|
|
|
consumer_group=None,
|
|
|
|
batch_interval=None,
|
|
|
|
batch_size=None,
|
|
|
|
bootstrap_servers=None,
|
|
|
|
configs=None,
|
|
|
|
credentials=None,
|
|
|
|
):
|
|
|
|
query_str = f"CREATE KAFKA STREAM {stream_name} TOPICS {topics} TRANSFORM {transformation}"
|
|
|
|
if consumer_group is not None:
|
|
|
|
query_str += f" CONSUMER_GROUP {consumer_group}"
|
|
|
|
if batch_interval is not None:
|
|
|
|
query_str += f" BATCH_INTERVAL {batch_interval}"
|
|
|
|
if batch_size is not None:
|
|
|
|
query_str += f" BATCH_SIZE {batch_size}"
|
|
|
|
if bootstrap_servers is not None:
|
|
|
|
query_str += f" BOOTSTRAP_SERVERS {bootstrap_servers}"
|
|
|
|
if configs is not None:
|
|
|
|
query_str += f" CONFIGS {configs}"
|
|
|
|
if credentials is not None:
|
|
|
|
query_str += f" CREDENTIALS {credentials}"
|
|
|
|
execute_and_fetch_all(cursor, query_str)
|
|
|
|
|
|
|
|
|
|
|
|
def start_stream(cursor, stream_name, sleep=True):
|
|
|
|
# Sleep is needed because although is_running returns True,
|
|
|
|
# the stream cannot accept messages yet
|
2021-07-22 22:22:08 +08:00
|
|
|
execute_and_fetch_all(cursor, f"START STREAM {stream_name}")
|
|
|
|
assert get_is_running(cursor, stream_name)
|
2023-12-21 03:03:06 +08:00
|
|
|
if sleep:
|
|
|
|
time.sleep(5)
|
|
|
|
|
|
|
|
|
|
|
|
def start_streams(cursor, stream_names):
|
|
|
|
# Start every stream but don't sleep after each creation
|
|
|
|
for stream_name in stream_names:
|
|
|
|
execute_and_fetch_all(cursor, f"START STREAM {stream_name}")
|
|
|
|
assert get_is_running(cursor, stream_name)
|
|
|
|
time.sleep(5)
|
2021-07-22 22:22:08 +08:00
|
|
|
|
|
|
|
|
2022-06-20 20:09:45 +08:00
|
|
|
def start_stream_with_limit(cursor, stream_name, batch_limit, timeout=None):
|
|
|
|
if timeout is not None:
|
|
|
|
execute_and_fetch_all(cursor, f"START STREAM {stream_name} BATCH_LIMIT {batch_limit} TIMEOUT {timeout} ")
|
|
|
|
else:
|
|
|
|
execute_and_fetch_all(cursor, f"START STREAM {stream_name} BATCH_LIMIT {batch_limit}")
|
|
|
|
|
|
|
|
|
2021-07-22 22:22:08 +08:00
|
|
|
def stop_stream(cursor, stream_name):
|
|
|
|
execute_and_fetch_all(cursor, f"STOP STREAM {stream_name}")
|
|
|
|
assert not get_is_running(cursor, stream_name)
|
|
|
|
|
|
|
|
|
|
|
|
def drop_stream(cursor, stream_name):
|
|
|
|
execute_and_fetch_all(cursor, f"DROP STREAM {stream_name}")
|
|
|
|
assert get_stream_info(cursor, stream_name) is None
|
|
|
|
|
|
|
|
|
2021-11-29 15:56:10 +08:00
|
|
|
def validate_info(actual_stream_info, expected_stream_info):
|
|
|
|
assert len(actual_stream_info) == len(expected_stream_info)
|
|
|
|
for info, expected_info in zip(actual_stream_info, expected_stream_info):
|
|
|
|
assert info == expected_info
|
|
|
|
|
2022-06-09 05:17:44 +08:00
|
|
|
|
2021-07-22 22:22:08 +08:00
|
|
|
def check_stream_info(cursor, stream_name, expected_stream_info):
|
|
|
|
stream_info = get_stream_info(cursor, stream_name)
|
2021-11-29 15:56:10 +08:00
|
|
|
validate_info(stream_info, expected_stream_info)
|
2021-11-15 23:23:49 +08:00
|
|
|
|
2022-06-09 05:17:44 +08:00
|
|
|
|
2021-11-15 23:23:49 +08:00
|
|
|
def kafka_check_vertex_exists_with_topic_and_payload(cursor, topic, payload_bytes):
|
2022-06-09 05:17:44 +08:00
|
|
|
decoded_payload = payload_bytes.decode("utf-8")
|
|
|
|
check_vertex_exists_with_properties(cursor, {"topic": f'"{topic}"', "payload": f'"{decoded_payload}"'})
|
|
|
|
|
2021-11-15 23:23:49 +08:00
|
|
|
|
2022-06-09 05:17:44 +08:00
|
|
|
PULSAR_SERVICE_URL = "pulsar://127.0.0.1:6650"
|
2021-11-15 23:23:49 +08:00
|
|
|
|
2021-11-29 15:56:10 +08:00
|
|
|
|
2021-11-15 23:23:49 +08:00
|
|
|
def pulsar_default_namespace_topic(topic):
|
2022-06-09 05:17:44 +08:00
|
|
|
return f"persistent://public/default/{topic}"
|
2021-11-15 23:23:49 +08:00
|
|
|
|
|
|
|
|
|
|
|
def test_start_and_stop_during_check(
|
2022-06-09 05:17:44 +08:00
|
|
|
operation, connection, stream_creator, message_sender, already_stopped_error, batchSize
|
|
|
|
):
|
2021-11-15 23:23:49 +08:00
|
|
|
# This test is quite complex. The goal is to call START/STOP queries
|
|
|
|
# while a CHECK query is waiting for its result. Because the Global
|
|
|
|
# Interpreter Lock, running queries on multiple threads is not useful,
|
|
|
|
# because only one of them can call Cursor::execute at a time. Therefore
|
|
|
|
# multiple processes are used to execute the queries, because different
|
|
|
|
# processes have different GILs.
|
|
|
|
# The counter variables are thread- and process-safe variables to
|
|
|
|
# synchronize between the different processes. Each value represents a
|
|
|
|
# specific phase of the execution of the processes.
|
|
|
|
assert operation in ["START", "STOP"]
|
2022-06-09 05:17:44 +08:00
|
|
|
assert batchSize == 1
|
2021-11-15 23:23:49 +08:00
|
|
|
cursor = connection.cursor()
|
2022-06-09 05:17:44 +08:00
|
|
|
execute_and_fetch_all(cursor, stream_creator("test_stream"))
|
2021-11-15 23:23:49 +08:00
|
|
|
|
|
|
|
check_counter = Value("i", 0)
|
|
|
|
check_result_len = Value("i", 0)
|
|
|
|
operation_counter = Value("i", 0)
|
|
|
|
|
|
|
|
CHECK_BEFORE_EXECUTE = 1
|
|
|
|
CHECK_AFTER_FETCHALL = 2
|
|
|
|
CHECK_CORRECT_RESULT = 3
|
|
|
|
CHECK_INCORRECT_RESULT = 4
|
|
|
|
|
|
|
|
def call_check(counter, result_len):
|
|
|
|
# This process will call the CHECK query and increment the counter
|
|
|
|
# based on its progress and expected behavior
|
|
|
|
connection = connect()
|
|
|
|
cursor = connection.cursor()
|
|
|
|
counter.value = CHECK_BEFORE_EXECUTE
|
|
|
|
result = execute_and_fetch_all(cursor, "CHECK STREAM test_stream")
|
|
|
|
result_len.value = len(result)
|
|
|
|
counter.value = CHECK_AFTER_FETCHALL
|
2022-06-09 05:17:44 +08:00
|
|
|
if (
|
|
|
|
len(result) > 0 and "payload: 'message'" in result[0][QUERIES][0][QUERY_LITERAL]
|
|
|
|
): # The 0 is only correct because batchSize is 1
|
2021-11-15 23:23:49 +08:00
|
|
|
counter.value = CHECK_CORRECT_RESULT
|
|
|
|
else:
|
|
|
|
counter.value = CHECK_INCORRECT_RESULT
|
|
|
|
|
|
|
|
OP_BEFORE_EXECUTE = 1
|
|
|
|
OP_AFTER_FETCHALL = 2
|
|
|
|
OP_ALREADY_STOPPED_EXCEPTION = 3
|
|
|
|
OP_INCORRECT_ALREADY_STOPPED_EXCEPTION = 4
|
|
|
|
OP_UNEXPECTED_EXCEPTION = 5
|
|
|
|
|
|
|
|
def call_operation(counter):
|
|
|
|
# This porcess will call the query with the specified operation and
|
|
|
|
# increment the counter based on its progress and expected behavior
|
|
|
|
connection = connect()
|
|
|
|
cursor = connection.cursor()
|
|
|
|
counter.value = OP_BEFORE_EXECUTE
|
|
|
|
try:
|
|
|
|
execute_and_fetch_all(cursor, f"{operation} STREAM test_stream")
|
|
|
|
counter.value = OP_AFTER_FETCHALL
|
|
|
|
except mgclient.DatabaseError as e:
|
|
|
|
if already_stopped_error in str(e):
|
|
|
|
counter.value = OP_ALREADY_STOPPED_EXCEPTION
|
|
|
|
else:
|
|
|
|
counter.value = OP_INCORRECT_ALREADY_STOPPED_EXCEPTION
|
|
|
|
except Exception:
|
|
|
|
counter.value = OP_UNEXPECTED_EXCEPTION
|
|
|
|
|
2022-06-09 05:17:44 +08:00
|
|
|
check_stream_proc = Process(target=call_check, daemon=True, args=(check_counter, check_result_len))
|
|
|
|
operation_proc = Process(target=call_operation, daemon=True, args=(operation_counter,))
|
2021-11-15 23:23:49 +08:00
|
|
|
|
|
|
|
try:
|
|
|
|
check_stream_proc.start()
|
|
|
|
|
2023-12-21 03:03:06 +08:00
|
|
|
time.sleep(3)
|
2021-11-15 23:23:49 +08:00
|
|
|
|
|
|
|
assert timed_wait(lambda: check_counter.value == CHECK_BEFORE_EXECUTE)
|
|
|
|
assert timed_wait(lambda: get_is_running(cursor, "test_stream"))
|
2022-06-09 05:17:44 +08:00
|
|
|
assert check_counter.value == CHECK_BEFORE_EXECUTE, "SHOW STREAMS " "was blocked until the end of CHECK STREAM"
|
2021-11-15 23:23:49 +08:00
|
|
|
operation_proc.start()
|
|
|
|
assert timed_wait(lambda: operation_counter.value == OP_BEFORE_EXECUTE)
|
|
|
|
|
|
|
|
message_sender(SIMPLE_MSG)
|
|
|
|
assert timed_wait(lambda: check_counter.value > CHECK_AFTER_FETCHALL)
|
|
|
|
assert check_counter.value == CHECK_CORRECT_RESULT
|
|
|
|
assert check_result_len.value == 1
|
|
|
|
check_stream_proc.join()
|
|
|
|
|
|
|
|
operation_proc.join()
|
|
|
|
if operation == "START":
|
|
|
|
assert operation_counter.value == OP_AFTER_FETCHALL
|
|
|
|
assert get_is_running(cursor, "test_stream")
|
|
|
|
else:
|
|
|
|
assert operation_counter.value == OP_ALREADY_STOPPED_EXCEPTION
|
|
|
|
assert not get_is_running(cursor, "test_stream")
|
|
|
|
|
|
|
|
finally:
|
|
|
|
# to make sure CHECK STREAM finishes
|
|
|
|
message_sender(SIMPLE_MSG)
|
|
|
|
if check_stream_proc.is_alive():
|
|
|
|
check_stream_proc.terminate()
|
|
|
|
if operation_proc.is_alive():
|
|
|
|
operation_proc.terminate()
|
|
|
|
|
2022-06-09 05:17:44 +08:00
|
|
|
|
2021-11-15 23:23:49 +08:00
|
|
|
def test_start_checked_stream_after_timeout(connection, stream_creator):
|
|
|
|
cursor = connection.cursor()
|
2023-12-21 03:03:06 +08:00
|
|
|
stream_name = "test_start_checked_stream_after_timeout"
|
|
|
|
execute_and_fetch_all(cursor, stream_creator(stream_name))
|
2021-11-15 23:23:49 +08:00
|
|
|
|
2023-12-21 03:03:06 +08:00
|
|
|
timeout_in_ms = 2000
|
2021-11-15 23:23:49 +08:00
|
|
|
|
|
|
|
def call_check():
|
2023-12-21 03:03:06 +08:00
|
|
|
execute_and_fetch_all(connect().cursor(), f"CHECK STREAM {stream_name} TIMEOUT {timeout_in_ms}")
|
2021-11-15 23:23:49 +08:00
|
|
|
|
|
|
|
check_stream_proc = Process(target=call_check, daemon=True)
|
|
|
|
|
|
|
|
check_stream_proc.start()
|
2023-12-21 03:03:06 +08:00
|
|
|
assert timed_wait(lambda: get_is_running(cursor, stream_name))
|
|
|
|
start_stream(cursor, stream_name)
|
2021-11-15 23:23:49 +08:00
|
|
|
|
2023-12-21 03:03:06 +08:00
|
|
|
assert get_is_running(cursor, stream_name)
|
|
|
|
stop_stream(cursor, stream_name)
|
2022-06-09 05:17:44 +08:00
|
|
|
|
|
|
|
|
|
|
|
def test_check_stream_same_number_of_queries_than_messages(connection, stream_creator, message_sender):
|
|
|
|
BATCH_SIZE = 2
|
|
|
|
BATCH_LIMIT = 3
|
|
|
|
STREAM_NAME = "test_stream"
|
|
|
|
cursor = connection.cursor()
|
|
|
|
execute_and_fetch_all(cursor, stream_creator(STREAM_NAME, BATCH_SIZE))
|
|
|
|
time.sleep(2)
|
|
|
|
|
|
|
|
test_results = Manager().Namespace()
|
|
|
|
|
|
|
|
def check_stream(stream_name, batch_limit):
|
|
|
|
connection = connect()
|
|
|
|
cursor = connection.cursor()
|
|
|
|
test_results.value = execute_and_fetch_all(cursor, f"CHECK STREAM {stream_name} BATCH_LIMIT {batch_limit} ")
|
|
|
|
|
|
|
|
check_stream_proc = Process(target=check_stream, args=(STREAM_NAME, BATCH_LIMIT))
|
|
|
|
check_stream_proc.start()
|
|
|
|
time.sleep(2)
|
|
|
|
|
|
|
|
MESSAGES = [b"01", b"02", b"03", b"04", b"05", b"06"]
|
|
|
|
for message in MESSAGES:
|
|
|
|
message_sender(message)
|
|
|
|
|
|
|
|
check_stream_proc.join()
|
|
|
|
|
|
|
|
# # Transformation does not do any filtering and simply create queries as "Messages: {contentOfMessage}". Queries should be like:
|
|
|
|
# # -Batch 1: [{parameters: {"value": "Parameter: 01"}, query: "Message: 01"},
|
|
|
|
# # {parameters: {"value": "Parameter: 02"}, query: "Message: 02"}]
|
|
|
|
# # -Batch 2: [{parameters: {"value": "Parameter: 03"}, query: "Message: 03"},
|
|
|
|
# # {parameters: {"value": "Parameter: 04"}, query: "Message: 04"}]
|
|
|
|
# # -Batch 3: [{parameters: {"value": "Parameter: 05"}, query: "Message: 05"},
|
|
|
|
# # {parameters: {"value": "Parameter: 06"}, query: "Message: 06"}]
|
|
|
|
assert len(test_results.value) == BATCH_LIMIT
|
|
|
|
|
|
|
|
expected_queries_and_raw_messages_1 = (
|
|
|
|
[ # queries
|
|
|
|
{PARAMETERS_LITERAL: {"value": "Parameter: 01"}, QUERY_LITERAL: "Message: 01"},
|
|
|
|
{PARAMETERS_LITERAL: {"value": "Parameter: 02"}, QUERY_LITERAL: "Message: 02"},
|
|
|
|
],
|
|
|
|
["01", "02"], # raw message
|
|
|
|
)
|
|
|
|
|
|
|
|
expected_queries_and_raw_messages_2 = (
|
|
|
|
[ # queries
|
|
|
|
{PARAMETERS_LITERAL: {"value": "Parameter: 03"}, QUERY_LITERAL: "Message: 03"},
|
|
|
|
{PARAMETERS_LITERAL: {"value": "Parameter: 04"}, QUERY_LITERAL: "Message: 04"},
|
|
|
|
],
|
|
|
|
["03", "04"], # raw message
|
|
|
|
)
|
|
|
|
|
|
|
|
expected_queries_and_raw_messages_3 = (
|
|
|
|
[ # queries
|
|
|
|
{PARAMETERS_LITERAL: {"value": "Parameter: 05"}, QUERY_LITERAL: "Message: 05"},
|
|
|
|
{PARAMETERS_LITERAL: {"value": "Parameter: 06"}, QUERY_LITERAL: "Message: 06"},
|
|
|
|
],
|
|
|
|
["05", "06"], # raw message
|
|
|
|
)
|
|
|
|
|
|
|
|
assert expected_queries_and_raw_messages_1 == test_results.value[0]
|
|
|
|
assert expected_queries_and_raw_messages_2 == test_results.value[1]
|
|
|
|
assert expected_queries_and_raw_messages_3 == test_results.value[2]
|
|
|
|
|
|
|
|
|
|
|
|
def test_check_stream_different_number_of_queries_than_messages(connection, stream_creator, message_sender):
|
|
|
|
BATCH_SIZE = 2
|
|
|
|
BATCH_LIMIT = 3
|
|
|
|
STREAM_NAME = "test_stream"
|
|
|
|
cursor = connection.cursor()
|
|
|
|
execute_and_fetch_all(cursor, stream_creator(STREAM_NAME, BATCH_SIZE))
|
2023-12-21 03:03:06 +08:00
|
|
|
time.sleep(3)
|
2022-06-09 05:17:44 +08:00
|
|
|
|
|
|
|
results = Manager().Namespace()
|
|
|
|
|
|
|
|
def check_stream(stream_name, batch_limit):
|
|
|
|
connection = connect()
|
|
|
|
cursor = connection.cursor()
|
|
|
|
results.value = execute_and_fetch_all(cursor, f"CHECK STREAM {stream_name} BATCH_LIMIT {batch_limit} ")
|
|
|
|
|
|
|
|
check_stream_proc = Process(target=check_stream, args=(STREAM_NAME, BATCH_LIMIT))
|
|
|
|
check_stream_proc.start()
|
|
|
|
time.sleep(2)
|
|
|
|
|
|
|
|
MESSAGES = [b"a_01", b"a_02", b"03", b"04", b"b_05", b"06"]
|
|
|
|
for message in MESSAGES:
|
|
|
|
message_sender(message)
|
|
|
|
|
|
|
|
check_stream_proc.join()
|
|
|
|
|
|
|
|
# Transformation does some filtering: if message contains "a", it is ignored.
|
|
|
|
# Transformation also has special rule to create query if message is "b": it create more queries.
|
|
|
|
#
|
|
|
|
# Queries should be like:
|
|
|
|
# -Batch 1: []
|
|
|
|
# -Batch 2: [{parameters: {"value": "Parameter: 03"}, query: "Message: 03"},
|
|
|
|
# {parameters: {"value": "Parameter: 04"}, query: "Message: 04"}]
|
|
|
|
# -Batch 3: [{parameters: {"value": "Parameter: 05"}, query: "Message: 05"},
|
|
|
|
# {parameters: {"value": "Parameter: extra_05"}, query: "Message: extra_05"}
|
|
|
|
# {parameters: {"value": "Parameter: 06"}, query: "Message: 06"}]
|
|
|
|
|
|
|
|
assert len(results.value) == BATCH_LIMIT
|
|
|
|
|
|
|
|
expected_queries_and_raw_messages_1 = (
|
|
|
|
[], # queries
|
|
|
|
["a_01", "a_02"], # raw message
|
|
|
|
)
|
|
|
|
|
|
|
|
expected_queries_and_raw_messages_2 = (
|
|
|
|
[ # queries
|
|
|
|
{PARAMETERS_LITERAL: {"value": "Parameter: 03"}, QUERY_LITERAL: "Message: 03"},
|
|
|
|
{PARAMETERS_LITERAL: {"value": "Parameter: 04"}, QUERY_LITERAL: "Message: 04"},
|
|
|
|
],
|
|
|
|
["03", "04"], # raw message
|
|
|
|
)
|
|
|
|
|
|
|
|
expected_queries_and_raw_messages_3 = (
|
|
|
|
[ # queries
|
|
|
|
{PARAMETERS_LITERAL: {"value": "Parameter: b_05"}, QUERY_LITERAL: "Message: b_05"},
|
|
|
|
{
|
|
|
|
PARAMETERS_LITERAL: {"value": "Parameter: extra_b_05"},
|
|
|
|
QUERY_LITERAL: "Message: extra_b_05",
|
|
|
|
},
|
|
|
|
{PARAMETERS_LITERAL: {"value": "Parameter: 06"}, QUERY_LITERAL: "Message: 06"},
|
|
|
|
],
|
|
|
|
["b_05", "06"], # raw message
|
|
|
|
)
|
|
|
|
|
|
|
|
assert expected_queries_and_raw_messages_1 == results.value[0]
|
|
|
|
assert expected_queries_and_raw_messages_2 == results.value[1]
|
|
|
|
assert expected_queries_and_raw_messages_3 == results.value[2]
|
2022-06-20 20:09:45 +08:00
|
|
|
|
|
|
|
|
2023-12-21 03:03:06 +08:00
|
|
|
def test_start_stream_with_batch_limit(connection, stream_name, stream_creator, messages_sender):
|
2022-06-20 20:09:45 +08:00
|
|
|
BATCH_LIMIT = 5
|
2023-12-21 03:03:06 +08:00
|
|
|
TIMEOUT = 10000
|
2022-06-20 20:09:45 +08:00
|
|
|
|
|
|
|
cursor = connection.cursor()
|
2023-12-21 03:03:06 +08:00
|
|
|
execute_and_fetch_all(cursor, stream_creator())
|
|
|
|
results = execute_and_fetch_all(connection.cursor(), "SHOW STREAMS")
|
|
|
|
assert len(results) == 1
|
2022-06-20 20:09:45 +08:00
|
|
|
|
2023-12-21 03:03:06 +08:00
|
|
|
def start_new_stream_with_limit():
|
2022-06-20 20:09:45 +08:00
|
|
|
connection = connect()
|
|
|
|
cursor = connection.cursor()
|
2023-12-21 03:03:06 +08:00
|
|
|
start_stream_with_limit(cursor, stream_name, BATCH_LIMIT, TIMEOUT)
|
2022-06-20 20:09:45 +08:00
|
|
|
|
2022-07-08 16:47:18 +08:00
|
|
|
def is_running():
|
2023-12-21 03:03:06 +08:00
|
|
|
return get_is_running(cursor, stream_name)
|
2022-07-08 16:47:18 +08:00
|
|
|
|
2023-12-21 03:03:06 +08:00
|
|
|
thread_stream_running = Process(target=start_new_stream_with_limit)
|
|
|
|
thread_stream_running.start()
|
2022-06-20 20:09:45 +08:00
|
|
|
|
2023-12-21 03:03:06 +08:00
|
|
|
execute_and_fetch_all(connection.cursor(), "SHOW STREAMS")
|
|
|
|
assert mg_sleep_and_assert(True, is_running)
|
2022-06-20 20:09:45 +08:00
|
|
|
messages_sender(BATCH_LIMIT - 1)
|
|
|
|
|
|
|
|
# We have not sent enough batches to reach the limit. We check that the stream is still correctly running.
|
2023-12-21 03:03:06 +08:00
|
|
|
assert get_is_running(cursor, stream_name)
|
2022-06-20 20:09:45 +08:00
|
|
|
|
|
|
|
# We send a last message to reach the batch_limit
|
|
|
|
messages_sender(1)
|
|
|
|
|
|
|
|
# We check that the stream has correctly stoped.
|
2022-07-08 16:47:18 +08:00
|
|
|
assert not mg_sleep_and_assert(False, is_running)
|
2022-06-20 20:09:45 +08:00
|
|
|
|
|
|
|
|
|
|
|
def test_start_stream_with_batch_limit_timeout(connection, stream_creator):
|
|
|
|
# We check that we get the expected exception when trying to run START STREAM while providing TIMEOUT and not BATCH_LIMIT
|
|
|
|
STREAM_NAME = "test"
|
|
|
|
|
|
|
|
cursor = connection.cursor()
|
|
|
|
execute_and_fetch_all(cursor, stream_creator(STREAM_NAME))
|
|
|
|
|
|
|
|
with pytest.raises(mgclient.DatabaseError):
|
|
|
|
execute_and_fetch_all(cursor, f"START STREAM {STREAM_NAME} TIMEOUT 3000")
|
|
|
|
|
|
|
|
|
|
|
|
def test_start_stream_with_batch_limit_reaching_timeout(connection, stream_creator):
|
|
|
|
# We check that we get the expected exception when running START STREAM while providing TIMEOUT and BATCH_LIMIT
|
|
|
|
STREAM_NAME = "test"
|
|
|
|
BATCH_LIMIT = 5
|
|
|
|
TIMEOUT = 3000
|
|
|
|
TIMEOUT_IN_SECONDS = TIMEOUT / 1000
|
|
|
|
cursor = connection.cursor()
|
|
|
|
execute_and_fetch_all(cursor, stream_creator(STREAM_NAME, BATCH_SIZE))
|
|
|
|
|
|
|
|
start_time = time.time()
|
|
|
|
|
|
|
|
with pytest.raises(mgclient.DatabaseError):
|
|
|
|
execute_and_fetch_all(cursor, f"START STREAM {STREAM_NAME} BATCH_LIMIT {BATCH_LIMIT} TIMEOUT {TIMEOUT}")
|
|
|
|
|
|
|
|
end_time = time.time()
|
|
|
|
assert (
|
|
|
|
end_time - start_time
|
|
|
|
) >= TIMEOUT_IN_SECONDS, "The START STREAM has probably thrown due to something else than timeout!"
|
|
|
|
|
|
|
|
|
|
|
|
def test_start_stream_with_batch_limit_while_check_running(
|
|
|
|
connection, stream_creator, message_sender, setup_function=None
|
|
|
|
):
|
|
|
|
# 1/ We check we get the correct exception calling START STREAM with BATCH_LIMIT while a CHECK STREAM is already running.
|
|
|
|
# 2/ Afterwards, we terminate the CHECK STREAM and start a START STREAM with BATCH_LIMIT
|
|
|
|
def start_check_stream(stream_name, batch_limit, timeout):
|
|
|
|
connection = connect()
|
|
|
|
cursor = connection.cursor()
|
|
|
|
execute_and_fetch_all(cursor, f"CHECK STREAM {stream_name} BATCH_LIMIT {batch_limit} TIMEOUT {timeout}")
|
|
|
|
|
|
|
|
def start_new_stream_with_limit(stream_name, batch_limit, timeout):
|
|
|
|
connection = connect()
|
|
|
|
cursor = connection.cursor()
|
|
|
|
start_stream_with_limit(cursor, stream_name, batch_limit, timeout=timeout)
|
|
|
|
|
|
|
|
STREAM_NAME = "test_check_and_batch_limit"
|
|
|
|
BATCH_LIMIT = 1
|
|
|
|
TIMEOUT = 10000
|
|
|
|
|
|
|
|
cursor = connection.cursor()
|
|
|
|
execute_and_fetch_all(cursor, stream_creator(STREAM_NAME))
|
|
|
|
|
|
|
|
# 0/ Extra setup needed for Kafka to works correctly if Check stream is execute before any messages have been consumed.
|
|
|
|
if setup_function is not None:
|
|
|
|
setup_function(start_check_stream, cursor, STREAM_NAME, BATCH_LIMIT, TIMEOUT)
|
|
|
|
|
|
|
|
# 1/
|
|
|
|
thread_stream_check = Process(target=start_check_stream, daemon=True, args=(STREAM_NAME, BATCH_LIMIT, TIMEOUT))
|
|
|
|
thread_stream_check.start()
|
2022-07-08 16:47:18 +08:00
|
|
|
|
|
|
|
def is_running():
|
|
|
|
return get_is_running(cursor, STREAM_NAME)
|
|
|
|
|
|
|
|
assert mg_sleep_and_assert(True, is_running)
|
2022-06-20 20:09:45 +08:00
|
|
|
|
|
|
|
with pytest.raises(mgclient.DatabaseError):
|
|
|
|
start_stream_with_limit(cursor, STREAM_NAME, BATCH_LIMIT, timeout=TIMEOUT)
|
|
|
|
|
|
|
|
assert get_is_running(cursor, STREAM_NAME)
|
|
|
|
message_sender(SIMPLE_MSG)
|
|
|
|
thread_stream_check.join()
|
|
|
|
|
|
|
|
assert not get_is_running(cursor, STREAM_NAME)
|
|
|
|
|
|
|
|
# 2/
|
|
|
|
thread_stream_running = Process(
|
|
|
|
target=start_new_stream_with_limit, daemon=True, args=(STREAM_NAME, BATCH_LIMIT + 1, TIMEOUT)
|
|
|
|
) # Sending BATCH_LIMIT + 1 messages as BATCH_LIMIT messages have already been sent during the CHECK STREAM (and not consumed)
|
|
|
|
thread_stream_running.start()
|
2022-07-08 16:47:18 +08:00
|
|
|
|
|
|
|
assert mg_sleep_and_assert(True, is_running)
|
2022-06-20 20:09:45 +08:00
|
|
|
|
|
|
|
message_sender(SIMPLE_MSG)
|
|
|
|
|
2022-07-08 16:47:18 +08:00
|
|
|
assert not mg_sleep_and_assert(False, is_running)
|
2022-06-20 20:09:45 +08:00
|
|
|
|
|
|
|
|
|
|
|
def test_check_while_stream_with_batch_limit_running(connection, stream_creator, message_sender):
|
|
|
|
# 1/ We check we get the correct exception calling CHECK STREAM while START STREAM with BATCH_LIMIT is already running
|
|
|
|
# 2/ Afterwards, we terminate the START STREAM with BATCH_LIMIT and start a CHECK STREAM
|
|
|
|
def start_new_stream_with_limit(stream_name, batch_limit, timeout):
|
|
|
|
connection = connect()
|
|
|
|
cursor = connection.cursor()
|
|
|
|
start_stream_with_limit(cursor, stream_name, batch_limit, timeout=timeout)
|
|
|
|
|
|
|
|
def start_check_stream(stream_name, batch_limit, timeout):
|
|
|
|
connection = connect()
|
|
|
|
cursor = connection.cursor()
|
|
|
|
execute_and_fetch_all(cursor, f"CHECK STREAM {stream_name} BATCH_LIMIT {batch_limit} TIMEOUT {timeout}")
|
|
|
|
|
|
|
|
STREAM_NAME = "test_batch_limit_and_check"
|
|
|
|
BATCH_LIMIT = 1
|
|
|
|
TIMEOUT = 10000
|
|
|
|
|
|
|
|
cursor = connection.cursor()
|
|
|
|
execute_and_fetch_all(cursor, stream_creator(STREAM_NAME))
|
|
|
|
|
|
|
|
# 1/
|
|
|
|
thread_stream_running = Process(
|
|
|
|
target=start_new_stream_with_limit, daemon=True, args=(STREAM_NAME, BATCH_LIMIT, TIMEOUT)
|
|
|
|
)
|
|
|
|
start_time = time.time()
|
|
|
|
thread_stream_running.start()
|
2022-07-08 16:47:18 +08:00
|
|
|
|
|
|
|
def is_running():
|
|
|
|
return get_is_running(cursor, STREAM_NAME)
|
|
|
|
|
|
|
|
assert mg_sleep_and_assert(True, is_running)
|
2022-06-20 20:09:45 +08:00
|
|
|
|
|
|
|
with pytest.raises(mgclient.DatabaseError):
|
|
|
|
execute_and_fetch_all(cursor, f"CHECK STREAM {STREAM_NAME} BATCH_LIMIT {BATCH_LIMIT} TIMEOUT {TIMEOUT}")
|
|
|
|
|
|
|
|
end_time = time.time()
|
|
|
|
assert (end_time - start_time) < 0.8 * TIMEOUT, "The CHECK STREAM has probably thrown due to timeout!"
|
|
|
|
|
|
|
|
message_sender(SIMPLE_MSG)
|
|
|
|
|
2022-07-08 16:47:18 +08:00
|
|
|
assert not mg_sleep_and_assert(False, is_running)
|
2022-06-20 20:09:45 +08:00
|
|
|
|
|
|
|
# 2/
|
|
|
|
thread_stream_check = Process(target=start_check_stream, daemon=True, args=(STREAM_NAME, BATCH_LIMIT, TIMEOUT))
|
|
|
|
start_time = time.time()
|
|
|
|
thread_stream_check.start()
|
2022-07-08 16:47:18 +08:00
|
|
|
assert mg_sleep_and_assert(True, is_running)
|
2022-06-20 20:09:45 +08:00
|
|
|
|
|
|
|
message_sender(SIMPLE_MSG)
|
2022-07-08 16:47:18 +08:00
|
|
|
assert not mg_sleep_and_assert(False, is_running)
|
2022-06-20 20:09:45 +08:00
|
|
|
|
|
|
|
|
|
|
|
def test_start_stream_with_batch_limit_with_invalid_batch_limit(connection, stream_creator):
|
|
|
|
# We check that we get a correct exception when giving a negative batch_limit
|
|
|
|
STREAM_NAME = "test_batch_limit_invalid_batch_limit"
|
|
|
|
TIMEOUT = 10000
|
|
|
|
TIMEOUT_IN_SECONDS = TIMEOUT / 1000
|
|
|
|
|
|
|
|
cursor = connection.cursor()
|
|
|
|
execute_and_fetch_all(cursor, stream_creator(STREAM_NAME))
|
|
|
|
time.sleep(2)
|
|
|
|
|
|
|
|
# 1/ checking with batch_limit=-10
|
|
|
|
batch_limit = -10
|
|
|
|
start_time = time.time()
|
|
|
|
|
|
|
|
with pytest.raises(mgclient.DatabaseError):
|
|
|
|
start_stream_with_limit(cursor, STREAM_NAME, batch_limit, timeout=TIMEOUT)
|
|
|
|
|
|
|
|
end_time = time.time()
|
|
|
|
assert (end_time - start_time) < 0.8 * TIMEOUT_IN_SECONDS, "The START STREAM has probably thrown due to timeout!"
|
|
|
|
|
|
|
|
# 2/ checking with batch_limit=0
|
|
|
|
batch_limit = 0
|
|
|
|
start_time = time.time()
|
|
|
|
|
|
|
|
with pytest.raises(mgclient.DatabaseError):
|
|
|
|
start_stream_with_limit(cursor, STREAM_NAME, batch_limit, timeout=TIMEOUT)
|
|
|
|
|
|
|
|
end_time = time.time()
|
|
|
|
assert (end_time - start_time) < 0.8 * TIMEOUT_IN_SECONDS, "The START STREAM has probably thrown due to timeout!"
|
|
|
|
|
|
|
|
|
|
|
|
def test_check_stream_with_batch_limit_with_invalid_batch_limit(connection, stream_creator):
|
|
|
|
# We check that we get a correct exception when giving a negative batch_limit
|
|
|
|
STREAM_NAME = "test_batch_limit_invalid_batch_limit"
|
|
|
|
TIMEOUT = 10000
|
|
|
|
TIMEOUT_IN_SECONDS = TIMEOUT / 1000
|
|
|
|
|
|
|
|
cursor = connection.cursor()
|
|
|
|
execute_and_fetch_all(cursor, stream_creator(STREAM_NAME))
|
2023-12-21 03:03:06 +08:00
|
|
|
time.sleep(3)
|
2022-06-20 20:09:45 +08:00
|
|
|
|
|
|
|
# 1/ checking with batch_limit=-10
|
|
|
|
batch_limit = -10
|
|
|
|
start_time = time.time()
|
|
|
|
|
|
|
|
with pytest.raises(mgclient.DatabaseError):
|
|
|
|
execute_and_fetch_all(cursor, f"CHECK STREAM {STREAM_NAME} BATCH_LIMIT {batch_limit} TIMEOUT {TIMEOUT}")
|
|
|
|
|
|
|
|
end_time = time.time()
|
|
|
|
assert (end_time - start_time) < 0.8 * TIMEOUT_IN_SECONDS, "The CHECK STREAM has probably thrown due to timeout!"
|
|
|
|
|
|
|
|
# 2/ checking with batch_limit=0
|
|
|
|
batch_limit = 0
|
|
|
|
start_time = time.time()
|
|
|
|
|
|
|
|
with pytest.raises(mgclient.DatabaseError):
|
|
|
|
execute_and_fetch_all(cursor, f"CHECK STREAM {STREAM_NAME} BATCH_LIMIT {batch_limit} TIMEOUT {TIMEOUT}")
|
|
|
|
|
|
|
|
end_time = time.time()
|
|
|
|
assert (end_time - start_time) < 0.8 * TIMEOUT_IN_SECONDS, "The CHECK STREAM has probably thrown due to timeout!"
|