Add bigger LDBC dataset to mgbench (#747)

2023-03-21 21:44:11 +01:00 · 2023-03-21 21:44:11 +01:00 · cb813c3070
commit cb813c3070
parent 6349fc9501
22 changed files with 3907 additions and 903 deletions
--- a/tests/mgbench/README.md
+++ b/tests/mgbench/README.md
@ -247,7 +247,7 @@ Index queries for each supported vendor can be downloaded from “https://s3.eu-
 |Q19|pattern_short| analytical | MATCH (n:User {id: $id})-[e]->(m) RETURN m LIMIT 1|
 |Q20|single_edge_write| write | MATCH (n:User {id: $from}), (m:User {id: $to}) WITH n, m CREATE (n)-[e:Temp]->(m) RETURN e|
 |Q21|single_vertex_write| write |CREATE (n:UserTemp {id : $id}) RETURN n|
-|Q22|single_vertex_property_update| update | MATCH (n:User {id: $id})-[e]->(m) RETURN m LIMIT 1|
+|Q22|single_vertex_property_update| update | MATCH (n:User {id: $id}) SET n.property = -1|
 |Q23|single_vertex_read| read | MATCH (n:User {id : $id}) RETURN n|

 ## :computer: Platform
--- a/tests/mgbench/benchmark.py
+++ b/tests/mgbench/benchmark.py
--- a/tests/mgbench/benchmark_context.py
+++ b/tests/mgbench/benchmark_context.py
@ -0,0 +1,57 @@
+# Describes all the information of single benchmark.py run.
+class BenchmarkContext:
+    """
+    Class for holding information on what type of benchmark is being executed
+    """
+
+    def __init__(
+        self,
+        benchmark_target_workload: str = None,  # Workload that needs to be executed (dataset/variant/group/query)
+        vendor_binary: str = None,  # Benchmark vendor binary
+        vendor_name: str = None,
+        client_binary: str = None,
+        num_workers_for_import: int = None,
+        num_workers_for_benchmark: int = None,
+        single_threaded_runtime_sec: int = 0,
+        no_load_query_counts: bool = False,
+        no_save_query_counts: bool = False,
+        export_results: str = None,
+        temporary_directory: str = None,
+        workload_mixed: str = None,  # Default mode is isolated, mixed None
+        workload_realistic: str = None,  # Default mode is isolated, realistic None
+        time_dependent_execution: int = 0,
+        warm_up: str = None,
+        performance_tracking: bool = False,
+        no_authorization: bool = True,
+        customer_workloads: str = None,
+        vendor_args: dict = {},
+    ) -> None:
+
+        self.benchmark_target_workload = benchmark_target_workload
+        self.vendor_binary = vendor_binary
+        self.vendor_name = vendor_name
+        self.client_binary = client_binary
+        self.num_workers_for_import = num_workers_for_import
+        self.num_workers_for_benchmark = num_workers_for_benchmark
+        self.single_threaded_runtime_sec = single_threaded_runtime_sec
+        self.no_load_query_counts = no_load_query_counts
+        self.no_save_query_counts = no_save_query_counts
+        self.export_results = export_results
+        self.temporary_directory = temporary_directory
+
+        if workload_mixed != None:
+            self.mode = "Mixed"
+            self.mode_config = workload_mixed
+        elif workload_realistic != None:
+            self.mode = "Realistic"
+            self.mode_config = workload_realistic
+        else:
+            self.mode = "Isolated"
+            self.mode_config = "Isolated run does not have a config."
+
+        self.time_dependent_execution = time_dependent_execution
+        self.performance_tracking = performance_tracking
+        self.warm_up = warm_up
+        self.no_authorization = no_authorization
+        self.customer_workloads = customer_workloads
+        self.vendor_args = vendor_args
--- a/tests/mgbench/client.cpp
+++ b/tests/mgbench/client.cpp
@ -289,6 +289,7 @@ void ExecuteTimeDependentWorkload(
  // Synchronize workers and collect runtime.
  while (ready.load(std::memory_order_acq_rel) < FLAGS_num_workers)
    ;
+
  run.store(true);
  for (int i = 0; i < FLAGS_num_workers; ++i) {
    threads[i].join();
@ -310,6 +311,7 @@ void ExecuteTimeDependentWorkload(

  final_duration /= FLAGS_num_workers;
  double execution_delta = time_limit.count() / final_duration;
+
  // This is adjusted throughput based on how much longer did workload execution time took.
  double throughput = (total_iterations / final_duration) * execution_delta;
  double raw_throughput = total_iterations / final_duration;
@ -319,7 +321,6 @@ void ExecuteTimeDependentWorkload(
  summary["duration"] = final_duration;
  summary["time_limit"] = FLAGS_time_dependent_execution;
  summary["queries_executed"] = total_iterations;
-
  summary["throughput"] = throughput;
  summary["raw_throughput"] = raw_throughput;
  summary["latency_stats"] = LatencyStatistics(worker_query_durations);
--- a/tests/mgbench/compare_results.py
+++ b/tests/mgbench/compare_results.py
@ -77,10 +77,10 @@ def compare_results(results_from, results_to, fields, ignored, different_vendors
                                recursive_get(summary_from, "database", key, value=None),
                                summary_to["database"][key],
                            )
-                        elif summary_to.get("query_statistics") != None and key in summary_to["query_statistics"]:
+                        elif summary_to.get("latency_stats") != None and key in summary_to["latency_stats"]:
                            row[key] = compute_diff(
-                                recursive_get(summary_from, "query_statistics", key, value=None),
-                                summary_to["query_statistics"][key],
+                                recursive_get(summary_from, "latency_stats", key, value=None),
+                                summary_to["latency_stats"][key],
                            )
                        elif not different_vendors:
                            row[key] = compute_diff(
@ -160,7 +160,10 @@ if __name__ == "__main__":
        help="Comparing different vendors, there is no need for metadata, duration, count check.",
    )
    parser.add_argument(
-        "--difference-threshold", type=float, help="Difference threshold for memory and throughput, 0.02 = 2% "
+        "--difference-threshold",
+        type=float,
+        default=0.02,
+        help="Difference threshold for memory and throughput, 0.02 = 2% ",
    )

    args = parser.parse_args()
--- a/tests/mgbench/cypher/init.py
+++ b/tests/mgbench/cypher/init.py
--- a/tests/mgbench/cypher/ldbc_to_cypher.py
+++ b/tests/mgbench/cypher/ldbc_to_cypher.py
@ -0,0 +1,500 @@
+import argparse
+import csv
+import sys
+from collections import defaultdict
+from pathlib import Path
+
+import helpers
+
+# Most recent list of LDBC datasets available at: https://github.com/ldbc/data-sets-surf-repository
+INTERACTIVE_LINK = {
+    "sf0.1": "https://repository.surfsara.nl/datasets/cwi/snb/files/social_network-csv_basic/social_network-csv_basic-sf0.1.tar.zst",
+    "sf0.3": "https://repository.surfsara.nl/datasets/cwi/snb/files/social_network-csv_basic/social_network-csv_basic-sf0.3.tar.zst",
+    "sf1": "https://repository.surfsara.nl/datasets/cwi/snb/files/social_network-csv_basic/social_network-csv_basic-sf1.tar.zst",
+    "sf3": "https://repository.surfsara.nl/datasets/cwi/snb/files/social_network-csv_basic/social_network-csv_basic-sf3.tar.zst",
+    "sf10": "https://repository.surfsara.nl/datasets/cwi/snb/files/social_network-csv_basic/social_network-csv_basic-sf10.tar.zst",
+}
+
+
+BI_LINK = {
+    "sf1": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/bi-sf1-composite-projected-fk.tar.zst",
+    "sf3": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/bi-sf3-composite-projected-fk.tar.zst",
+    "sf10": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/bi-sf10-composite-projected-fk.tar.zst",
+}
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(
+        prog="LDBC CSV to CYPHERL converter",
+        description="""Converts all LDBC CSV files to CYPHERL transactions, for faster Memgraph load""",
+    )
+    parser.add_argument(
+        "--size",
+        required=True,
+        choices=["0.1", "0.3", "1", "3", "10"],
+        help="Interactive: (0.1 , 0.3, 1, 3, 10) BI: (1, 3, 10)",
+    )
+    parser.add_argument("--type", required=True, choices=["interactive", "bi"], help="interactive or bi")
+
+    args = parser.parse_args()
+    output_directory = Path().absolute() / ".cache" / "LDBC_generated"
+    output_directory.mkdir(exist_ok=True)
+
+    if args.type == "interactive":
+
+        NODES_INTERACTIVE = [
+            {"filename": "Place", "label": "Place"},
+            {"filename": "Organisation", "label": "Organisation"},
+            {"filename": "TagClass", "label": "TagClass"},
+            {"filename": "Tag", "label": "Tag"},
+            {"filename": "Comment", "label": "Message:Comment"},
+            {"filename": "Forum", "label": "Forum"},
+            {"filename": "Person", "label": "Person"},
+            {"filename": "Post", "label": "Message:Post"},
+        ]
+
+        EDGES_INTERACTIVE = [
+            {
+                "filename": "Place_isPartOf_Place",
+                "source_label": "Place",
+                "type": "IS_PART_OF",
+                "target_label": "Place",
+            },
+            {
+                "filename": "TagClass_isSubclassOf_TagClass",
+                "source_label": "TagClass",
+                "type": "IS_SUBCLASS_OF",
+                "target_label": "TagClass",
+            },
+            {
+                "filename": "Organisation_isLocatedIn_Place",
+                "source_label": "Organisation",
+                "type": "IS_LOCATED_IN",
+                "target_label": "Place",
+            },
+            {"filename": "Tag_hasType_TagClass", "source_label": "Tag", "type": "HAS_TYPE", "target_label": "TagClass"},
+            {
+                "filename": "Comment_hasCreator_Person",
+                "source_label": "Comment",
+                "type": "HAS_CREATOR",
+                "target_label": "Person",
+            },
+            {
+                "filename": "Comment_isLocatedIn_Place",
+                "source_label": "Comment",
+                "type": "IS_LOCATED_IN",
+                "target_label": "Place",
+            },
+            {
+                "filename": "Comment_replyOf_Comment",
+                "source_label": "Comment",
+                "type": "REPLY_OF",
+                "target_label": "Comment",
+            },
+            {"filename": "Comment_replyOf_Post", "source_label": "Comment", "type": "REPLY_OF", "target_label": "Post"},
+            {
+                "filename": "Forum_containerOf_Post",
+                "source_label": "Forum",
+                "type": "CONTAINER_OF",
+                "target_label": "Post",
+            },
+            {
+                "filename": "Forum_hasMember_Person",
+                "source_label": "Forum",
+                "type": "HAS_MEMBER",
+                "target_label": "Person",
+            },
+            {
+                "filename": "Forum_hasModerator_Person",
+                "source_label": "Forum",
+                "type": "HAS_MODERATOR",
+                "target_label": "Person",
+            },
+            {"filename": "Forum_hasTag_Tag", "source_label": "Forum", "type": "HAS_TAG", "target_label": "Tag"},
+            {
+                "filename": "Person_hasInterest_Tag",
+                "source_label": "Person",
+                "type": "HAS_INTEREST",
+                "target_label": "Tag",
+            },
+            {
+                "filename": "Person_isLocatedIn_Place",
+                "source_label": "Person",
+                "type": "IS_LOCATED_IN",
+                "target_label": "Place",
+            },
+            {"filename": "Person_knows_Person", "source_label": "Person", "type": "KNOWS", "target_label": "Person"},
+            {"filename": "Person_likes_Comment", "source_label": "Person", "type": "LIKES", "target_label": "Comment"},
+            {"filename": "Person_likes_Post", "source_label": "Person", "type": "LIKES", "target_label": "Post"},
+            {
+                "filename": "Post_hasCreator_Person",
+                "source_label": "Post",
+                "type": "HAS_CREATOR",
+                "target_label": "Person",
+            },
+            {"filename": "Comment_hasTag_Tag", "source_label": "Comment", "type": "HAS_TAG", "target_label": "Tag"},
+            {"filename": "Post_hasTag_Tag", "source_label": "Post", "type": "HAS_TAG", "target_label": "Tag"},
+            {
+                "filename": "Post_isLocatedIn_Place",
+                "source_label": "Post",
+                "type": "IS_LOCATED_IN",
+                "target_label": "Place",
+            },
+            {
+                "filename": "Person_studyAt_Organisation",
+                "source_label": "Person",
+                "type": "STUDY_AT",
+                "target_label": "Organisation",
+            },
+            {
+                "filename": "Person_workAt_Organisation",
+                "source_label": "Person",
+                "type": "WORK_AT",
+                "target_label": "Organisation",
+            },
+        ]
+
+        file_size = "sf{}".format(args.size)
+        out_file = "ldbc_interactive_{}.cypher".format(file_size)
+        output = output_directory / out_file
+        if output.exists():
+            output.unlink()
+
+        files_present = None
+        for file in output_directory.glob("**/*.tar.zst"):
+            if "basic-" + file_size in file.name:
+                files_present = file.with_suffix("").with_suffix("")
+                break
+
+        if not files_present:
+            try:
+                print("Downloading the file... " + INTERACTIVE_LINK[file_size])
+                downloaded_file = helpers.download_file(INTERACTIVE_LINK[file_size], output_directory.absolute())
+                print("Unpacking the file..." + downloaded_file)
+                files_present = helpers.unpack_tar_zst(Path(downloaded_file))
+            except:
+                print("Issue with downloading and unpacking the file, check if links are working properly.")
+                raise
+
+        input_files = {}
+        for file in files_present.glob("**/*.csv"):
+            name = file.name.replace("_0_0.csv", "").lower()
+            input_files[name] = file
+
+        for node_file in NODES_INTERACTIVE:
+            key = node_file["filename"].lower()
+            default_label = node_file["label"]
+            query = None
+            if key in input_files.keys():
+                with input_files[key].open("r") as input_f, output.open("a") as output_f:
+                    reader = csv.DictReader(input_f, delimiter="|")
+
+                    for row in reader:
+                        if "type" in row.keys():
+                            label = default_label + ":" + row.pop("type").capitalize()
+                        else:
+                            label = default_label
+
+                        query = "CREATE (:{} {{id:{}, ".format(label, row.pop("id"))
+                        # Format properties to fit Memgraph
+                        for k, v in row.items():
+                            if k == "creationDate":
+                                row[k] = 'localDateTime("{}")'.format(v[0:-5])
+                            elif k == "birthday":
+                                row[k] = 'date("{}")'.format(v)
+                            elif k == "length":
+                                row[k] = "toInteger({})".format(v)
+                            else:
+                                row[k] = '"{}"'.format(v)
+
+                        prop_string = ", ".join("{} : {}".format(k, v) for k, v in row.items())
+                        query = query + prop_string + "});"
+                        output_f.write(query + "\n")
+                print("Converted file: " + input_files[key].name + " to " + output.name)
+            else:
+                print("Didn't process node file: " + key)
+                raise Exception("Didn't find the file that was needed!")
+
+        for edge_file in EDGES_INTERACTIVE:
+            key = edge_file["filename"].lower()
+            source_label = edge_file["source_label"]
+            edge_type = edge_file["type"]
+            target_label = edge_file["target_label"]
+            if key in input_files.keys():
+                query = None
+                with input_files[key].open("r") as input_f, output.open("a") as output_f:
+                    sufixl = ".id"
+                    sufixr = ".id"
+                    # Handle identical label/key in CSV header
+                    if source_label == target_label:
+                        sufixl = "l"
+                        sufixr = "r"
+                    # Move a place from header
+                    header = next(input_f).strip().split("|")
+                    reader = csv.DictReader(
+                        input_f, delimiter="|", fieldnames=([source_label + sufixl, target_label + sufixr] + header[2:])
+                    )
+
+                    for row in reader:
+                        query = "MATCH (n1:{} {{id:{}}}), (n2:{} {{id:{}}}) ".format(
+                            source_label, row.pop(source_label + sufixl), target_label, row.pop(target_label + sufixr)
+                        )
+                        for k, v in row.items():
+                            if "date" in k.lower():
+                                # Take time zone out
+                                row[k] = 'localDateTime("{}")'.format(v[0:-5])
+                            elif "workfrom" in k.lower() or "classyear" in k.lower():
+                                row[k] = 'toInteger("{}")'.format(v)
+                            else:
+                                row[k] = '"{}"'.format(v)
+
+                        edge_part = "CREATE (n1)-[:{}{{".format(edge_type)
+                        prop_string = ", ".join("{} : {}".format(k, v) for k, v in row.items())
+
+                        query = query + edge_part + prop_string + "}]->(n2);"
+                        output_f.write(query + "\n")
+                print("Converted file: " + input_files[key].name + " to " + output.name)
+            else:
+                print("Didn't process Edge file: " + key)
+                raise Exception("Didn't find the file that was needed!")
+
+    elif args.type == "bi":
+
+        NODES_BI = [
+            {"filename": "Place", "label": "Place"},
+            {"filename": "Organisation", "label": "Organisation"},
+            {"filename": "TagClass", "label": "TagClass"},
+            {"filename": "Tag", "label": "Tag"},
+            {"filename": "Comment", "label": "Message:Comment"},
+            {"filename": "Forum", "label": "Forum"},
+            {"filename": "Person", "label": "Person"},
+            {"filename": "Post", "label": "Message:Post"},
+        ]
+
+        EDGES_BI = [
+            {
+                "filename": "Place_isPartOf_Place",
+                "source_label": "Place",
+                "type": "IS_PART_OF",
+                "target_label": "Place",
+            },
+            {
+                "filename": "TagClass_isSubclassOf_TagClass",
+                "source_label": "TagClass",
+                "type": "IS_SUBCLASS_OF",
+                "target_label": "TagClass",
+            },
+            {
+                "filename": "Organisation_isLocatedIn_Place",
+                "source_label": "Organisation",
+                "type": "IS_LOCATED_IN",
+                "target_label": "Place",
+            },
+            {"filename": "Tag_hasType_TagClass", "source_label": "Tag", "type": "HAS_TYPE", "target_label": "TagClass"},
+            {
+                "filename": "Comment_hasCreator_Person",
+                "source_label": "Comment",
+                "type": "HAS_CREATOR",
+                "target_label": "Person",
+            },
+            # Change place to Country
+            {
+                "filename": "Comment_isLocatedIn_Country",
+                "source_label": "Comment",
+                "type": "IS_LOCATED_IN",
+                "target_label": "Country",
+            },
+            {
+                "filename": "Comment_replyOf_Comment",
+                "source_label": "Comment",
+                "type": "REPLY_OF",
+                "target_label": "Comment",
+            },
+            {"filename": "Comment_replyOf_Post", "source_label": "Comment", "type": "REPLY_OF", "target_label": "Post"},
+            {
+                "filename": "Forum_containerOf_Post",
+                "source_label": "Forum",
+                "type": "CONTAINER_OF",
+                "target_label": "Post",
+            },
+            {
+                "filename": "Forum_hasMember_Person",
+                "source_label": "Forum",
+                "type": "HAS_MEMBER",
+                "target_label": "Person",
+            },
+            {
+                "filename": "Forum_hasModerator_Person",
+                "source_label": "Forum",
+                "type": "HAS_MODERATOR",
+                "target_label": "Person",
+            },
+            {"filename": "Forum_hasTag_Tag", "source_label": "Forum", "type": "HAS_TAG", "target_label": "Tag"},
+            {
+                "filename": "Person_hasInterest_Tag",
+                "source_label": "Person",
+                "type": "HAS_INTEREST",
+                "target_label": "Tag",
+            },
+            # Changed place to City
+            {
+                "filename": "Person_isLocatedIn_City",
+                "source_label": "Person",
+                "type": "IS_LOCATED_IN",
+                "target_label": "City",
+            },
+            {"filename": "Person_knows_Person", "source_label": "Person", "type": "KNOWS", "target_label": "Person"},
+            {"filename": "Person_likes_Comment", "source_label": "Person", "type": "LIKES", "target_label": "Comment"},
+            {"filename": "Person_likes_Post", "source_label": "Person", "type": "LIKES", "target_label": "Post"},
+            {
+                "filename": "Post_hasCreator_Person",
+                "source_label": "Post",
+                "type": "HAS_CREATOR",
+                "target_label": "Person",
+            },
+            {"filename": "Comment_hasTag_Tag", "source_label": "Comment", "type": "HAS_TAG", "target_label": "Tag"},
+            {"filename": "Post_hasTag_Tag", "source_label": "Post", "type": "HAS_TAG", "target_label": "Tag"},
+            # Change place to Country
+            {
+                "filename": "Post_isLocatedIn_Country",
+                "source_label": "Post",
+                "type": "IS_LOCATED_IN",
+                "target_label": "Country",
+            },
+            # Changed organisation to University
+            {
+                "filename": "Person_studyAt_University",
+                "source_label": "Person",
+                "type": "STUDY_AT",
+                "target_label": "University",
+            },
+            # Changed organisation to Company
+            {
+                "filename": "Person_workAt_Company",
+                "source_label": "Person",
+                "type": "WORK_AT",
+                "target_label": "Company",
+            },
+        ]
+
+        file_size = "sf{}".format(args.size)
+        out_file = "ldbc_bi_{}.cypher".format(file_size)
+        output = output_directory / out_file
+        if output.exists():
+            output.unlink()
+
+        files_present = None
+        for file in output_directory.glob("**/*.tar.zst"):
+            if "bi-" + file_size in file.name:
+                files_present = file.with_suffix("").with_suffix("")
+                break
+
+        if not files_present:
+            try:
+                print("Downloading the file... " + BI_LINK[file_size])
+                downloaded_file = helpers.download_file(BI_LINK[file_size], output_directory.absolute())
+                print("Unpacking the file..." + downloaded_file)
+                files_present = helpers.unpack_tar_zst(Path(downloaded_file))
+            except:
+                print("Issue with downloading and unpacking the file, check if links are working properly.")
+                raise
+
+        for file in files_present.glob("**/*.csv.gz"):
+            if "initial_snapshot" in file.parts:
+                helpers.unpack_gz(file)
+
+        input_files = defaultdict(list)
+        for file in files_present.glob("**/*.csv"):
+            key = file.parents[0].name
+            input_files[file.parents[0].name].append(file)
+
+        for node_file in NODES_BI:
+            key = node_file["filename"]
+            default_label = node_file["label"]
+            query = None
+            if key in input_files.keys():
+                for part_file in input_files[key]:
+                    with part_file.open("r") as input_f, output.open("a") as output_f:
+                        reader = csv.DictReader(input_f, delimiter="|")
+
+                        for row in reader:
+                            if "type" in row.keys():
+                                label = default_label + ":" + row.pop("type")
+                            else:
+                                label = default_label
+
+                            query = "CREATE (:{} {{id:{}, ".format(label, row.pop("id"))
+                            # Format properties to fit Memgraph
+                            for k, v in row.items():
+                                if k == "creationDate":
+                                    row[k] = 'localDateTime("{}")'.format(v[0:-6])
+                                elif k == "birthday":
+                                    row[k] = 'date("{}")'.format(v)
+                                elif k == "length":
+                                    row[k] = "toInteger({})".format(v)
+                                else:
+                                    row[k] = '"{}"'.format(v)
+
+                            prop_string = ", ".join("{} : {}".format(k, v) for k, v in row.items())
+                            query = query + prop_string + "});"
+                            output_f.write(query + "\n")
+                    print("Key: " + key + " Converted file: " + part_file.name + " to " + output.name)
+            else:
+                print("Didn't process node file: " + key)
+
+        for edge_file in EDGES_BI:
+            key = edge_file["filename"]
+            source_label = edge_file["source_label"]
+            edge_type = edge_file["type"]
+            target_label = edge_file["target_label"]
+            if key in input_files.keys():
+                for part_file in input_files[key]:
+                    query = None
+                    with part_file.open("r") as input_f, output.open("a") as output_f:
+                        sufixl = "Id"
+                        sufixr = "Id"
+                        # Handle identical label/key in CSV header
+                        if source_label == target_label:
+                            sufixl = "l"
+                            sufixr = "r"
+                        # Move a place from header
+                        header = next(input_f).strip().split("|")
+                        if len(header) >= 3:
+                            reader = csv.DictReader(
+                                input_f,
+                                delimiter="|",
+                                fieldnames=(["date", source_label + sufixl, target_label + sufixr] + header[3:]),
+                            )
+                        else:
+                            reader = csv.DictReader(
+                                input_f,
+                                delimiter="|",
+                                fieldnames=([source_label + sufixl, target_label + sufixr] + header[2:]),
+                            )
+
+                        for row in reader:
+                            query = "MATCH (n1:{} {{id:{}}}), (n2:{} {{id:{}}}) ".format(
+                                source_label,
+                                row.pop(source_label + sufixl),
+                                target_label,
+                                row.pop(target_label + sufixr),
+                            )
+                            for k, v in row.items():
+                                if "date" in k.lower():
+                                    # Take time zone out
+                                    row[k] = 'localDateTime("{}")'.format(v[0:-6])
+                                elif k == "classYear" or k == "workFrom":
+                                    row[k] = 'toInteger("{}")'.format(v)
+                                else:
+                                    row[k] = '"{}"'.format(v)
+
+                            edge_part = "CREATE (n1)-[:{}{{".format(edge_type)
+                            prop_string = ", ".join("{} : {}".format(k, v) for k, v in row.items())
+
+                            query = query + edge_part + prop_string + "}]->(n2);"
+                            output_f.write(query + "\n")
+                    print("Key: " + key + " Converted file: " + part_file.name + " to " + output.name)
+            else:
+                print("Didn't process Edge file: " + key)
+                raise Exception("Didn't find the file that was needed!")
--- a/tests/mgbench/graph_bench.py
+++ b/tests/mgbench/graph_bench.py
@ -16,14 +16,20 @@ def parse_arguments():
        help="Forward name and paths to vendors binary"
        "Example: --vendor memgraph /path/to/binary --vendor neo4j /path/to/binary",
    )
+
    parser.add_argument(
-        "--dataset-size",
-        default="small",
-        choices=["small", "medium", "large"],
-        help="Pick a dataset size (small, medium, large)",
+        "--dataset-name",
+        default="",
+        help="Dataset name you wish to execute",
    )

-    parser.add_argument("--dataset-group", default="basic", help="Select a group of queries")
+    parser.add_argument(
+        "--dataset-size",
+        default="",
+        help="Pick a dataset variant you wish to execute",
+    )
+
+    parser.add_argument("--dataset-group", default="", help="Select a group of queries")

    parser.add_argument(
        "--realistic",
@ -53,88 +59,110 @@ def parse_arguments():
    return args


-def run_full_benchmarks(vendor, binary, dataset_size, dataset_group, realistic, mixed):
+def run_full_benchmarks(vendor, binary, dataset, dataset_size, dataset_group, realistic, mixed):

    configurations = [
-        # Basic full group test cold
+        # Basic isolated test cold
        [
            "--export-results",
-            vendor + "_" + dataset_size + "_cold_isolated.json",
+            vendor + "_" + dataset + "_" + dataset_size + "_cold_isolated.json",
        ],
-        # Basic full group test hot
+        # Basic isolated test hot
        [
            "--export-results",
-            vendor + "_" + dataset_size + "_hot_isolated.json",
-            "--warmup-run",
+            vendor + "_" + dataset + "_" + dataset_size + "_hot_isolated.json",
+            "--warm-up",
+            "hot",
+        ],
+        # Basic isolated test vulcanic
+        [
+            "--export-results",
+            vendor + "_" + dataset + "_" + dataset_size + "_vulcanic_isolated.json",
+            "--warm-up",
+            "vulcanic",
        ],
    ]

-    # Configurations for full workload
-    for count, write, read, update, analytical in realistic:
-        cold = [
-            "--export-results",
-            vendor
-            + "_"
-            + dataset_size
-            + "_cold_realistic_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical),
-            "--mixed-workload",
-            count,
-            write,
-            read,
-            update,
-            analytical,
-        ]
+    if realistic:
+        # Configurations for full workload
+        for count, write, read, update, analytical in realistic:
+            cold = [
+                "--export-results",
+                vendor
+                + "_"
+                + dataset
+                + "_"
+                + dataset_size
+                + "_cold_realistic_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical),
+                "--workload-realistic",
+                count,
+                write,
+                read,
+                update,
+                analytical,
+            ]

-        hot = [
-            "--export-results",
-            vendor
-            + "_"
-            + dataset_size
-            + "_hot_realistic_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical),
-            "--warmup-run",
-            "--mixed-workload",
-            count,
-            write,
-            read,
-            update,
-            analytical,
-        ]
-        configurations.append(cold)
-        configurations.append(hot)
+            hot = [
+                "--export-results",
+                vendor
+                + "_"
+                + dataset
+                + "_"
+                + dataset_size
+                + "_hot_realistic_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical),
+                "--warm-up",
+                "hot",
+                "--workload-realistic",
+                count,
+                write,
+                read,
+                update,
+                analytical,
+            ]

-    # Configurations for workload per query
-    for count, write, read, update, analytical, query in mixed:
-        cold = [
-            "--export-results",
-            vendor
-            + "_"
-            + dataset_size
-            + "_cold_mixed_{}_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical, query),
-            "--mixed-workload",
-            count,
-            write,
-            read,
-            update,
-            analytical,
-            query,
-        ]
-        hot = [
-            "--export-results",
-            vendor
-            + "_"
-            + dataset_size
-            + "_hot_mixed_{}_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical, query),
-            "--warmup-run",
-            "--mixed-workload",
-            count,
-            write,
-            read,
-            update,
-            analytical,
-            query,
-        ]
-        configurations.append(cold)
-        configurations.append(hot)
+            configurations.append(cold)
+            configurations.append(hot)
+
+    if mixed:
+        # Configurations for workload per query
+        for count, write, read, update, analytical, query in mixed:
+            cold = [
+                "--export-results",
+                vendor
+                + "_"
+                + dataset
+                + "_"
+                + dataset_size
+                + "_cold_mixed_{}_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical, query),
+                "--workload-mixed",
+                count,
+                write,
+                read,
+                update,
+                analytical,
+                query,
+            ]
+            hot = [
+                "--export-results",
+                vendor
+                + "_"
+                + dataset
+                + "_"
+                + dataset_size
+                + "_hot_mixed_{}_{}_{}_{}_{}_{}.json".format(count, write, read, update, analytical, query),
+                "--warm-up",
+                "hot",
+                "--workload-mixed",
+                count,
+                write,
+                read,
+                update,
+                analytical,
+                query,
+            ]
+
+            configurations.append(cold)
+            configurations.append(hot)

    default_args = [
        "python3",
@ -146,9 +174,7 @@ def run_full_benchmarks(vendor, binary, dataset_size, dataset_group, realistic,
        "--num-workers-for-benchmark",
        "12",
        "--no-authorization",
-        "pokec/" + dataset_size + "/" + dataset_group + "/*",
-        "--tail-latency",
-        "100",
+        dataset + "/" + dataset_size + "/" + dataset_group + "/*",
    ]

    for config in configurations:
@ -157,11 +183,11 @@ def run_full_benchmarks(vendor, binary, dataset_size, dataset_group, realistic,
        subprocess.run(args=full_config, check=True)


-def collect_all_results(vendor_name, dataset_size, dataset_group):
+def collect_all_results(vendor_name, dataset, dataset_size, dataset_group):
    working_directory = Path().absolute()
    print(working_directory)
-    results = sorted(working_directory.glob(vendor_name + "_" + dataset_size + "_*.json"))
-    summary = {"pokec": {dataset_size: {dataset_group: {}}}}
+    results = sorted(working_directory.glob(vendor_name + "_" + dataset + "_" + dataset_size + "_*.json"))
+    summary = {dataset: {dataset_size: {dataset_group: {}}}}

    for file in results:
        if "summary" in file.name:
@ -169,19 +195,22 @@ def collect_all_results(vendor_name, dataset_size, dataset_group):
        f = file.open()
        data = json.loads(f.read())
        if data["__run_configuration__"]["condition"] == "hot":
-            for key, value in data["pokec"][dataset_size][dataset_group].items():
+            for key, value in data[dataset][dataset_size][dataset_group].items():
                key_condition = key + "_hot"
-                summary["pokec"][dataset_size][dataset_group][key_condition] = value
+                summary[dataset][dataset_size][dataset_group][key_condition] = value
        elif data["__run_configuration__"]["condition"] == "cold":
-            for key, value in data["pokec"][dataset_size][dataset_group].items():
+            for key, value in data[dataset][dataset_size][dataset_group].items():
                key_condition = key + "_cold"
-                summary["pokec"][dataset_size][dataset_group][key_condition] = value
-
+                summary[dataset][dataset_size][dataset_group][key_condition] = value
+        elif data["__run_configuration__"]["condition"] == "vulcanic":
+            for key, value in data[dataset][dataset_size][dataset_group].items():
+                key_condition = key + "_vulcanic"
+                summary[dataset][dataset_size][dataset_group][key_condition] = value
    print(summary)

    json_object = json.dumps(summary, indent=4)
    print(json_object)
-    with open(vendor_name + "_" + dataset_size + "_summary.json", "w") as f:
+    with open(vendor_name + "_" + dataset + "_" + dataset_size + "_summary.json", "w") as f:
        json.dump(summary, f)


@ -194,16 +223,17 @@ if __name__ == "__main__":
    vendor_names = {"memgraph", "neo4j"}
    for vendor_name, vendor_binary in args.vendor:
        path = Path(vendor_binary)
-        if vendor_name.lower() in vendor_names and (path.is_file() or path.is_dir()):
+        if vendor_name.lower() in vendor_names and path.is_file():
            run_full_benchmarks(
                vendor_name,
                vendor_binary,
+                args.dataset_name,
                args.dataset_size,
                args.dataset_group,
                realistic,
                mixed,
            )
-            collect_all_results(vendor_name, args.dataset_size, args.dataset_group)
+            collect_all_results(vendor_name, args.dataset_name, args.dataset_size, args.dataset_group)
        else:
            raise Exception(
                "Check that vendor: {} is supported and you are passing right path: {} to binary.".format(
--- a/tests/mgbench/helpers.py
+++ b/tests/mgbench/helpers.py
@ -1,4 +1,4 @@
-# Copyright 2021 Memgraph Ltd.
+# Copyright 2023 Memgraph Ltd.
 #
 # Use of this software is governed by the Business Source License
 # included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -9,11 +9,21 @@
 # by the Apache License, Version 2.0, included in the file
 # licenses/APL.txt.

+import collections
 import copy
+import fnmatch
+import importlib
+import inspect
 import json
 import os
 import subprocess
+import sys
+from pathlib import Path

+import workloads
+from benchmark_context import BenchmarkContext
+from workloads import *
+from workloads import base

 SCRIPT_DIR = os.path.dirname(os.path.realpath(__file__))

@ -28,22 +38,70 @@ def get_binary_path(path, base=""):


 def download_file(url, path):
-    ret = subprocess.run(["wget", "-nv", "--content-disposition", url],
-                         stderr=subprocess.PIPE, cwd=path, check=True)
+    ret = subprocess.run(["wget", "-nv", "--content-disposition", url], stderr=subprocess.PIPE, cwd=path, check=True)
    data = ret.stderr.decode("utf-8")
    tmp = data.split("->")[1]
-    name = tmp[tmp.index('"') + 1:tmp.rindex('"')]
+    name = tmp[tmp.index('"') + 1 : tmp.rindex('"')]
    return os.path.join(path, name)


-def unpack_and_move_file(input_path, output_path):
+def unpack_gz_and_move_file(input_path, output_path):
    if input_path.endswith(".gz"):
-        subprocess.run(["gunzip", input_path],
-                       stdout=subprocess.DEVNULL, check=True)
+        subprocess.run(["gunzip", input_path], stdout=subprocess.DEVNULL, check=True)
        input_path = input_path[:-3]
    os.rename(input_path, output_path)


+def unpack_gz(input_path: Path):
+    if input_path.suffix == ".gz":
+        subprocess.run(["gzip", "-d", input_path], capture_output=True, check=True)
+        input_path = input_path.with_suffix("")
+    return input_path
+
+
+def unpack_zip(input_path: Path):
+    if input_path.suffix == ".zip":
+        subprocess.run(["unzip", input_path], capture_output=True, check=True, cwd=input_path.parent)
+        input_path = input_path.with_suffix("")
+    return input_path
+
+
+def unpack_tar_zst(input_path: Path):
+    if input_path.suffix == ".zst":
+        subprocess.run(
+            ["tar", "--use-compress-program=unzstd", "-xvf", input_path],
+            cwd=input_path.parent,
+            capture_output=True,
+            check=True,
+        )
+        input_path = input_path.with_suffix("").with_suffix("")
+    return input_path
+
+
+def unpack_tar_gz(input_path: Path):
+    if input_path.suffix == ".gz":
+        subprocess.run(
+            ["tar", "-xvf", input_path],
+            cwd=input_path.parent,
+            capture_output=True,
+            check=True,
+        )
+        input_path = input_path.with_suffix("").with_suffix("")
+    return input_path
+
+
+def unpack_tar_zst_and_move(input_path: Path, output_path: Path):
+    if input_path.suffix == ".zst":
+        subprocess.run(
+            ["tar", "--use-compress-program=unzstd", "-xvf", input_path],
+            cwd=input_path.parent,
+            capture_output=True,
+            check=True,
+        )
+        input_path = input_path.with_suffix("").with_suffix("")
+    return input_path.rename(output_path)
+
+
 def ensure_directory(path):
    if not os.path.exists(path):
        os.makedirs(path)
@ -51,6 +109,129 @@ def ensure_directory(path):
        raise Exception("The path '{}' should be a directory!".format(path))


+def get_available_workloads(customer_workloads: str = None) -> dict:
+    generators = {}
+    for module in map(workloads.__dict__.get, workloads.__all__):
+        for key in dir(module):
+            if key.startswith("_"):
+                continue
+            base_class = getattr(module, key)
+            if not inspect.isclass(base_class) or not issubclass(base_class, base.Workload):
+                continue
+            queries = collections.defaultdict(list)
+            for funcname in dir(base_class):
+                if not funcname.startswith("benchmark__"):
+                    continue
+                group, query = funcname.split("__")[1:]
+                queries[group].append((query, funcname))
+            generators[base_class.NAME] = (base_class, dict(queries))
+
+    if customer_workloads:
+        head_tail = os.path.split(customer_workloads)
+        path_without_dataset_name = head_tail[0]
+        dataset_name = head_tail[1].split(".")[0]
+        sys.path.append(path_without_dataset_name)
+        dataset_to_use = importlib.import_module(dataset_name)
+
+        for key in dir(dataset_to_use):
+            if key.startswith("_"):
+                continue
+            base_class = getattr(dataset_to_use, key)
+            if not inspect.isclass(base_class) or not issubclass(base_class, base.Workload):
+                continue
+            queries = collections.defaultdict(list)
+            for funcname in dir(base_class):
+                if not funcname.startswith("benchmark__"):
+                    continue
+                group, query = funcname.split("__")[1:]
+                queries[group].append((query, funcname))
+            generators[base_class.NAME] = (base_class, dict(queries))
+
+    return generators
+
+
+def list_available_workloads(customer_workloads: str = None):
+    generators = get_available_workloads(customer_workloads)
+    for name in sorted(generators.keys()):
+        print("Dataset:", name)
+        dataset, queries = generators[name]
+        print(
+            "    Variants:",
+            ", ".join(dataset.VARIANTS),
+            "(default: " + dataset.DEFAULT_VARIANT + ")",
+        )
+        for group in sorted(queries.keys()):
+            print("    Group:", group)
+            for query_name, query_func in queries[group]:
+                print("        Query:", query_name)
+
+
+def match_patterns(workload, variant, group, query, is_default_variant, patterns):
+    for pattern in patterns:
+        verdict = [fnmatch.fnmatchcase(workload, pattern[0])]
+        if pattern[1] != "":
+            verdict.append(fnmatch.fnmatchcase(variant, pattern[1]))
+        else:
+            verdict.append(is_default_variant)
+        verdict.append(fnmatch.fnmatchcase(group, pattern[2]))
+        verdict.append(fnmatch.fnmatchcase(query, pattern[3]))
+        if all(verdict):
+            return True
+    return False
+
+
+def filter_workloads(available_workloads: dict, benchmark_context: BenchmarkContext) -> list:
+    patterns = benchmark_context.benchmark_target_workload
+    for i in range(len(patterns)):
+        pattern = patterns[i].split("/")
+        if len(pattern) > 5 or len(pattern) == 0:
+            raise Exception("Invalid benchmark description '" + pattern + "'!")
+        pattern.extend(["", "*", "*"][len(pattern) - 1 :])
+        patterns[i] = pattern
+    filtered = []
+    for workload in sorted(available_workloads.keys()):
+        generator, queries = available_workloads[workload]
+        for variant in generator.VARIANTS:
+            is_default_variant = variant == generator.DEFAULT_VARIANT
+            current = collections.defaultdict(list)
+            for group in queries:
+                for query_name, query_func in queries[group]:
+                    if match_patterns(
+                        workload,
+                        variant,
+                        group,
+                        query_name,
+                        is_default_variant,
+                        patterns,
+                    ):
+                        current[group].append((query_name, query_func))
+            if len(current) == 0:
+                continue
+
+            # Ignore benchgraph "basic" queries in standard CI/CD run
+            for pattern in patterns:
+                res = pattern.count("*")
+                key = "basic"
+                if res >= 2 and key in current.keys():
+                    current.pop(key)
+
+            filtered.append((generator(variant=variant, benchmark_context=benchmark_context), dict(current)))
+    return filtered
+
+
+def parse_kwargs(items):
+    """
+    Parse a series of key-value pairs and return a dictionary
+    """
+    d = {}
+
+    if items:
+        for item in items:
+            key, value = item.split("=")
+            d[key] = value
+    return d
+
+
 class Directory:
    def __init__(self, path):
        self._path = path
@ -103,6 +284,9 @@ class Cache:
        ensure_directory(path)
        return Directory(path)

+    def get_default_cache_directory(self):
+        return self._directory
+
    def load_config(self):
        if not os.path.isfile(self._config):
            return RecursiveDict()
--- a/tests/mgbench/log.py
+++ b/tests/mgbench/log.py
@ -9,6 +9,8 @@
 # by the Apache License, Version 2.0, included in the file
 # licenses/APL.txt.

+import logging
+
 COLOR_GRAY = 0
 COLOR_RED = 1
 COLOR_GREEN = 2
@ -16,27 +18,45 @@ COLOR_YELLOW = 3
 COLOR_BLUE = 4
 COLOR_VIOLET = 5
 COLOR_CYAN = 6
+COLOR_WHITE = 7


-def log(color, *args):
+logger = logging.Logger("mgbench_logger")
+file_handler = logging.FileHandler("mgbench_logs.log")
+file_format = logging.Formatter("%(asctime)s - %(name)s - %(levelname)s - %(message)s")
+file_handler.setFormatter(file_format)
+logger.addHandler(file_handler)
+
+
+def _log(color, *args):
    print("\033[1;3{}m~~".format(color), *args, "~~\033[0m")


+def log(msg):
+    print(msg)
+    logger.info(msg=msg)
+
+
 def init(*args):
-    log(COLOR_BLUE, *args)
+    _log(COLOR_BLUE, *args)
+    logger.info(*args)


 def info(*args):
-    log(COLOR_CYAN, *args)
+    _log(COLOR_WHITE, *args)
+    logger.info(*args)


 def success(*args):
-    log(COLOR_GREEN, *args)
+    _log(COLOR_GREEN, *args)
+    logger.info(*args)


 def warning(*args):
-    log(COLOR_YELLOW, *args)
+    _log(COLOR_YELLOW, *args)
+    logger.warning(*args)


 def error(*args):
-    log(COLOR_RED, *args)
+    _log(COLOR_RED, *args)
+    logger.critical(*args)
--- a/tests/mgbench/runners.py
+++ b/tests/mgbench/runners.py
@ -1,4 +1,4 @@
-# Copyright 2022 Memgraph Ltd.
+# Copyright 2023 Memgraph Ltd.
 #
 # Use of this software is governed by the Business Source License
 # included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
@ -17,10 +17,13 @@ import subprocess
 import tempfile
 import threading
 import time
+from abc import ABC, abstractmethod
 from pathlib import Path

+from benchmark_context import BenchmarkContext

-def wait_for_server(port, delay=0.1):
+
+def _wait_for_server(port, delay=0.1):
    cmd = ["nc", "-z", "-w", "1", "127.0.0.1", str(port)]
    while subprocess.call(cmd) != 0:
        time.sleep(0.01)
@ -62,50 +65,165 @@ def _get_current_usage(pid):
    return rss / 1024


-class Memgraph:
-    def __init__(self, memgraph_binary, temporary_dir, properties_on_edges, bolt_port, performance_tracking):
-        self._memgraph_binary = memgraph_binary
-        self._directory = tempfile.TemporaryDirectory(dir=temporary_dir)
-        self._properties_on_edges = properties_on_edges
+class BaseClient(ABC):
+    @abstractmethod
+    def __init__(self, benchmark_context: BenchmarkContext):
+        self.benchmark_context = benchmark_context
+
+    @abstractmethod
+    def execute(self):
+        pass
+
+
+class BoltClient(BaseClient):
+    def __init__(self, benchmark_context: BenchmarkContext):
+        self._client_binary = benchmark_context.client_binary
+        self._directory = tempfile.TemporaryDirectory(dir=benchmark_context.temporary_directory)
+        self._username = ""
+        self._password = ""
+        self._bolt_port = (
+            benchmark_context.vendor_args["bolt-port"] if "bolt-port" in benchmark_context.vendor_args.keys() else 7687
+        )
+
+    def _get_args(self, **kwargs):
+        return _convert_args_to_flags(self._client_binary, **kwargs)
+
+    def set_credentials(self, username: str, password: str):
+        self._username = username
+        self._password = password
+
+    def execute(
+        self,
+        queries=None,
+        file_path=None,
+        num_workers=1,
+        max_retries: int = 50,
+        validation: bool = False,
+        time_dependent_execution: int = 0,
+    ):
+        if (queries is None and file_path is None) or (queries is not None and file_path is not None):
+            raise ValueError("Either queries or input_path must be specified!")
+
+        queries_json = False
+        if queries is not None:
+            queries_json = True
+            file_path = os.path.join(self._directory.name, "queries.json")
+            with open(file_path, "w") as f:
+                for query in queries:
+                    json.dump(query, f)
+                    f.write("\n")
+        args = self._get_args(
+            input=file_path,
+            num_workers=num_workers,
+            max_retries=max_retries,
+            queries_json=queries_json,
+            username=self._username,
+            password=self._password,
+            port=self._bolt_port,
+            validation=validation,
+            time_dependent_execution=time_dependent_execution,
+        )
+
+        ret = None
+        try:
+            ret = subprocess.run(args, capture_output=True)
+        finally:
+            error = ret.stderr.decode("utf-8").strip().split("\n")
+            data = ret.stdout.decode("utf-8").strip().split("\n")
+            if error and error[0] != "":
+                print("Reported errros from client")
+                print(error)
+            data = [x for x in data if not x.startswith("[")]
+            return list(map(json.loads, data))
+
+
+class BaseRunner(ABC):
+    subclasses = {}
+
+    def __init_subclass__(cls, **kwargs) -> None:
+        super().__init_subclass__(**kwargs)
+        cls.subclasses[cls.__name__.lower()] = cls
+        return
+
+    @classmethod
+    def create(cls, benchmark_context: BenchmarkContext):
+        if benchmark_context.vendor_name not in cls.subclasses:
+            raise ValueError("Missing runner with name: {}".format(benchmark_context.vendor_name))
+
+        return cls.subclasses[benchmark_context.vendor_name](
+            benchmark_context=benchmark_context,
+        )
+
+    @abstractmethod
+    def __init__(self, benchmark_context: BenchmarkContext):
+        self.benchmark_context = benchmark_context
+
+    @abstractmethod
+    def start_benchmark(self):
+        pass
+
+    @abstractmethod
+    def start_preparation(self):
+        pass
+
+    @abstractmethod
+    def stop(self):
+        pass
+
+    @abstractmethod
+    def clean_db(self):
+        pass
+
+    @abstractmethod
+    def fetch_client(self) -> BaseClient:
+        pass
+
+
+class Memgraph(BaseRunner):
+    def __init__(self, benchmark_context: BenchmarkContext):
+        super().__init__(benchmark_context=benchmark_context)
+        self._memgraph_binary = benchmark_context.vendor_binary
+        self._performance_tracking = benchmark_context.performance_tracking
+        self._directory = tempfile.TemporaryDirectory(dir=benchmark_context.temporary_directory)
+        self._vendor_args = benchmark_context.vendor_args
+        self._properties_on_edges = (
+            self._vendor_args["no-properties-on-edges"]
+            if "no-properties-on-edges" in self._vendor_args.keys()
+            else False
+        )
+        self._bolt_port = self._vendor_args["bolt-port"] if "bolt-port" in self._vendor_args.keys() else 7687
        self._proc_mg = None
-        self._bolt_port = bolt_port
-        self.performance_tracking = performance_tracking
        self._stop_event = threading.Event()
        self._rss = []
-        atexit.register(self._cleanup)

        # Determine Memgraph version
-        ret = subprocess.run([memgraph_binary, "--version"], stdout=subprocess.PIPE, check=True)
+        ret = subprocess.run([self._memgraph_binary, "--version"], stdout=subprocess.PIPE, check=True)
        version = re.search(r"[0-9]+\.[0-9]+\.[0-9]+", ret.stdout.decode("utf-8")).group(0)
        self._memgraph_version = tuple(map(int, version.split(".")))

+        atexit.register(self._cleanup)
+
    def __del__(self):
        self._cleanup()
        atexit.unregister(self._cleanup)

-    def _get_args(self, **kwargs):
+    def _set_args(self, **kwargs):
        data_directory = os.path.join(self._directory.name, "memgraph")
        kwargs["bolt_port"] = self._bolt_port
-        if self._memgraph_version >= (0, 50, 0):
-            kwargs["data_directory"] = data_directory
-        else:
-            kwargs["durability_directory"] = data_directory
-        if self._memgraph_version >= (0, 50, 0):
-            kwargs["storage_properties_on_edges"] = self._properties_on_edges
-        else:
-            assert self._properties_on_edges, "Older versions of Memgraph can't disable properties on edges!"
+        kwargs["data_directory"] = data_directory
+        kwargs["storage_properties_on_edges"] = self._properties_on_edges
        return _convert_args_to_flags(self._memgraph_binary, **kwargs)

    def _start(self, **kwargs):
        if self._proc_mg is not None:
            raise Exception("The database process is already running!")
-        args = self._get_args(**kwargs)
+        args = self._set_args(**kwargs)
        self._proc_mg = subprocess.Popen(args, stdout=subprocess.DEVNULL)
        time.sleep(0.2)
        if self._proc_mg.poll() is not None:
            self._proc_mg = None
            raise Exception("The database process died prematurely!")
-        wait_for_server(self._bolt_port)
+        _wait_for_server(self._bolt_port)
        ret = self._proc_mg.poll()
        assert ret is None, "The database process died prematurely " "({})!".format(ret)

@ -119,7 +237,7 @@ class Memgraph:
        return ret, usage

    def start_preparation(self, workload):
-        if self.performance_tracking:
+        if self._performance_tracking:
            p = threading.Thread(target=self.res_background_tracking, args=(self._rss, self._stop_event))
            self._stop_event.clear()
            self._rss.clear()
@ -127,13 +245,26 @@ class Memgraph:
        self._start(storage_snapshot_on_exit=True)

    def start_benchmark(self, workload):
-        if self.performance_tracking:
+        if self._performance_tracking:
            p = threading.Thread(target=self.res_background_tracking, args=(self._rss, self._stop_event))
            self._stop_event.clear()
            self._rss.clear()
            p.start()
        self._start(storage_recover_on_startup=True)

+    def clean_db(self):
+        if self._proc_mg is not None:
+            raise Exception("The database process is already running, cannot clear data it!")
+        else:
+            out = subprocess.run(
+                args="rm -Rf memgraph/snapshots/*",
+                cwd=self._directory.name,
+                capture_output=True,
+                shell=True,
+            )
+            print(out.stderr.decode("utf-8"))
+            print(out.stdout.decode("utf-8"))
+
    def res_background_tracking(self, res, stop_event):
        print("Started rss tracking.")
        while not stop_event.is_set():
@ -154,35 +285,46 @@ class Memgraph:
            f.close()

    def stop(self, workload):
-        if self.performance_tracking:
+        if self._performance_tracking:
            self._stop_event.set()
            self.dump_rss(workload)
        ret, usage = self._cleanup()
        assert ret == 0, "The database process exited with a non-zero " "status ({})!".format(ret)
        return usage

+    def fetch_client(self) -> BoltClient:
+        return BoltClient(benchmark_context=self.benchmark_context)

-class Neo4j:
-    def __init__(self, neo4j_path, temporary_dir, bolt_port, performance_tracking):
-        self._neo4j_path = Path(neo4j_path)
-        self._neo4j_binary = Path(neo4j_path) / "bin" / "neo4j"
-        self._neo4j_config = Path(neo4j_path) / "conf" / "neo4j.conf"
-        self._neo4j_pid = Path(neo4j_path) / "run" / "neo4j.pid"
-        self._neo4j_admin = Path(neo4j_path) / "bin" / "neo4j-admin"
-        self.performance_tracking = performance_tracking
+
+class Neo4j(BaseRunner):
+    def __init__(self, benchmark_context: BenchmarkContext):
+        super().__init__(benchmark_context=benchmark_context)
+        self._neo4j_binary = Path(benchmark_context.vendor_binary)
+        self._neo4j_path = Path(benchmark_context.vendor_binary).parents[1]
+        self._neo4j_config = self._neo4j_path / "conf" / "neo4j.conf"
+        self._neo4j_pid = self._neo4j_path / "run" / "neo4j.pid"
+        self._neo4j_admin = self._neo4j_path / "bin" / "neo4j-admin"
+        self._performance_tracking = benchmark_context.performance_tracking
+        self._vendor_args = benchmark_context.vendor_args
        self._stop_event = threading.Event()
        self._rss = []

        if not self._neo4j_binary.is_file():
            raise Exception("Wrong path to binary!")
-        self._directory = tempfile.TemporaryDirectory(dir=temporary_dir)
-        self._bolt_port = bolt_port
+
+        tempfile.TemporaryDirectory(dir=benchmark_context.temporary_directory)
+        self._bolt_port = (
+            self.benchmark_context.vendor_args["bolt-port"]
+            if "bolt-port" in self.benchmark_context.vendor_args.keys()
+            else 7687
+        )
        atexit.register(self._cleanup)
        configs = []
        memory_flag = "server.jvm.additional=-XX:NativeMemoryTracking=detail"
        auth_flag = "dbms.security.auth_enabled=false"
-
-        if self.performance_tracking:
+        bolt_flag = "server.bolt.listen_address=:7687"
+        http_flag = "server.http.listen_address=:7474"
+        if self._performance_tracking:
            configs.append(memory_flag)
        else:
            lines = []
@ -201,6 +343,8 @@ class Neo4j:
                file.close()

        configs.append(auth_flag)
+        configs.append(bolt_flag)
+        configs.append(http_flag)
        print("Check neo4j config flags:")
        for conf in configs:
            with self._neo4j_config.open("r+") as file:
@ -234,7 +378,7 @@ class Neo4j:
        else:
            raise Exception("The database process died prematurely!")
        print("Run server check:")
-        wait_for_server(self._bolt_port)
+        _wait_for_server(self._bolt_port)

    def _cleanup(self):
        if self._neo4j_pid.exists():
@ -248,7 +392,7 @@ class Neo4j:
            return 0

    def start_preparation(self, workload):
-        if self.performance_tracking:
+        if self._performance_tracking:
            p = threading.Thread(target=self.res_background_tracking, args=(self._rss, self._stop_event))
            self._stop_event.clear()
            self._rss.clear()
@ -257,11 +401,11 @@ class Neo4j:
        # Start DB
        self._start()

-        if self.performance_tracking:
+        if self._performance_tracking:
            self.get_memory_usage("start_" + workload)

    def start_benchmark(self, workload):
-        if self.performance_tracking:
+        if self._performance_tracking:
            p = threading.Thread(target=self.res_background_tracking, args=(self._rss, self._stop_event))
            self._stop_event.clear()
            self._rss.clear()
@ -269,7 +413,7 @@ class Neo4j:
        # Start DB
        self._start()

-        if self.performance_tracking:
+        if self._performance_tracking:
            self.get_memory_usage("start_" + workload)

    def dump_db(self, path):
@ -290,6 +434,20 @@ class Neo4j:
                check=True,
            )

+    def clean_db(self):
+        print("Cleaning the database")
+        if self._neo4j_pid.exists():
+            raise Exception("Cannot clean DB because it is running.")
+        else:
+            out = subprocess.run(
+                args="rm -Rf data/databases/* data/transactions/*",
+                cwd=self._neo4j_path,
+                capture_output=True,
+                shell=True,
+            )
+            print(out.stderr.decode("utf-8"))
+            print(out.stdout.decode("utf-8"))
+
    def load_db_from_dump(self, path):
        print("Loading the neo4j database from dump...")
        if self._neo4j_pid.exists():
@ -300,7 +458,8 @@ class Neo4j:
                    self._neo4j_admin,
                    "database",
                    "load",
-                    "--from-path=" + path,
+                    "--from-path",
+                    path,
                    "--overwrite-destination=true",
                    "neo4j",
                ],
@ -325,7 +484,7 @@ class Neo4j:
            return True

    def stop(self, workload):
-        if self.performance_tracking:
+        if self._performance_tracking:
            self._stop_event.set()
            self.get_memory_usage("stop_" + workload)
            self.dump_rss(workload)
@ -360,51 +519,5 @@ class Neo4j:
                f.write(memory_usage.stdout)
                f.close()

-
-class Client:
-    def __init__(
-        self, client_binary: str, temporary_directory: str, bolt_port: int, username: str = "", password: str = ""
-    ):
-        self._client_binary = client_binary
-        self._directory = tempfile.TemporaryDirectory(dir=temporary_directory)
-        self._username = username
-        self._password = password
-        self._bolt_port = bolt_port
-
-    def _get_args(self, **kwargs):
-        return _convert_args_to_flags(self._client_binary, **kwargs)
-
-    def execute(self, queries=None, file_path=None, num_workers=1):
-        if (queries is None and file_path is None) or (queries is not None and file_path is not None):
-            raise ValueError("Either queries or input_path must be specified!")
-
-        # TODO: check `file_path.endswith(".json")` to support advanced
-        # input queries
-
-        queries_json = False
-        if queries is not None:
-            queries_json = True
-            file_path = os.path.join(self._directory.name, "queries.json")
-            with open(file_path, "w") as f:
-                for query in queries:
-                    json.dump(query, f)
-                    f.write("\n")
-
-        args = self._get_args(
-            input=file_path,
-            num_workers=num_workers,
-            queries_json=queries_json,
-            username=self._username,
-            password=self._password,
-            port=self._bolt_port,
-        )
-
-        ret = subprocess.run(args, capture_output=True, check=True)
-        error = ret.stderr.decode("utf-8").strip().split("\n")
-        if error and error[0] != "":
-            print("Reported errros from client")
-            print(error)
-
-        data = ret.stdout.decode("utf-8").strip().split("\n")
-        data = [x for x in data if not x.startswith("[")]
-        return list(map(json.loads, data))
+    def fetch_client(self) -> BoltClient:
+        return BoltClient(benchmark_context=self.benchmark_context)
--- a/tests/mgbench/validation.py
+++ b/tests/mgbench/validation.py
@ -0,0 +1,244 @@
+import argparse
+import copy
+import multiprocessing
+import random
+
+import helpers
+import runners
+import workloads
+from benchmark_context import BenchmarkContext
+from workloads import base
+
+
+def pars_args():
+
+    parser = argparse.ArgumentParser(
+        prog="Validator for individual query checking",
+        description="""Validates that query is running, and validates output between different vendors""",
+    )
+    parser.add_argument(
+        "benchmarks",
+        nargs="*",
+        default="",
+        help="descriptions of benchmarks that should be run; "
+        "multiple descriptions can be specified to run multiple "
+        "benchmarks; the description is specified as "
+        "dataset/variant/group/query; Unix shell-style wildcards "
+        "can be used in the descriptions; variant, group and query "
+        "are optional and they can be left out; the default "
+        "variant is '' which selects the default dataset variant; "
+        "the default group is '*' which selects all groups; the"
+        "default query is '*' which selects all queries",
+    )
+
+    parser.add_argument(
+        "--vendor-binary-1",
+        help="Vendor binary used for benchmarking, by default it is memgraph",
+        default=helpers.get_binary_path("memgraph"),
+    )
+
+    parser.add_argument(
+        "--vendor-name-1",
+        default="memgraph",
+        choices=["memgraph", "neo4j"],
+        help="Input vendor binary name (memgraph, neo4j)",
+    )
+
+    parser.add_argument(
+        "--vendor-binary-2",
+        help="Vendor binary used for benchmarking, by default it is memgraph",
+        default=helpers.get_binary_path("memgraph"),
+    )
+
+    parser.add_argument(
+        "--vendor-name-2",
+        default="memgraph",
+        choices=["memgraph", "neo4j"],
+        help="Input vendor binary name (memgraph, neo4j)",
+    )
+
+    parser.add_argument(
+        "--client-binary",
+        default=helpers.get_binary_path("tests/mgbench/client"),
+        help="Client binary used for benchmarking",
+    )
+
+    parser.add_argument(
+        "--temporary-directory",
+        default="/tmp",
+        help="directory path where temporary data should " "be stored",
+    )
+
+    parser.add_argument(
+        "--num-workers-for-import",
+        type=int,
+        default=multiprocessing.cpu_count() // 2,
+        help="number of workers used to import the dataset",
+    )
+
+    return parser.parse_args()
+
+
+def get_queries(gen, count):
+    # Make the generator deterministic.
+    random.seed(gen.__name__)
+    # Generate queries.
+    ret = []
+    for i in range(count):
+        ret.append(gen())
+    return ret
+
+
+if __name__ == "__main__":
+
+    args = pars_args()
+
+    benchmark_context_db_1 = BenchmarkContext(
+        vendor_name=args.vendor_name_1,
+        vendor_binary=args.vendor_binary_1,
+        benchmark_target_workload=copy.copy(args.benchmarks),
+        client_binary=args.client_binary,
+        num_workers_for_import=args.num_workers_for_import,
+        temporary_directory=args.temporary_directory,
+    )
+
+    available_workloads = helpers.get_available_workloads()
+
+    print(helpers.list_available_workloads())
+
+    vendor_runner = runners.BaseRunner.create(
+        benchmark_context=benchmark_context_db_1,
+    )
+
+    cache = helpers.Cache()
+    client = vendor_runner.fetch_client()
+
+    workloads = helpers.filter_workloads(
+        available_workloads=available_workloads, benchmark_context=benchmark_context_db_1
+    )
+
+    results_db_1 = {}
+
+    for workload, queries in workloads:
+
+        vendor_runner.clean_db()
+
+        generated_queries = workload.dataset_generator()
+        if generated_queries:
+            vendor_runner.start_preparation("import")
+            client.execute(queries=generated_queries, num_workers=benchmark_context_db_1.num_workers_for_import)
+            vendor_runner.stop("import")
+        else:
+            workload.prepare(cache.cache_directory("datasets", workload.NAME, workload.get_variant()))
+            imported = workload.custom_import()
+            if not imported:
+                vendor_runner.start_preparation("import")
+                print("Executing database cleanup and index setup...")
+                client.execute(
+                    file_path=workload.get_index(), num_workers=benchmark_context_db_1.num_workers_for_import
+                )
+                print("Importing dataset...")
+                ret = client.execute(
+                    file_path=workload.get_file(), num_workers=benchmark_context_db_1.num_workers_for_import
+                )
+                usage = vendor_runner.stop("import")
+
+        for group in sorted(queries.keys()):
+            for query, funcname in queries[group]:
+                print("Running query:{}/{}/{}".format(group, query, funcname))
+                func = getattr(workload, funcname)
+                count = 1
+                vendor_runner.start_benchmark("validation")
+                try:
+                    ret = client.execute(queries=get_queries(func, count), num_workers=1, validation=True)[0]
+                    results_db_1[funcname] = ret["results"].items()
+                except Exception as e:
+                    print("Issue running the query" + funcname)
+                    print(e)
+                    results_db_1[funcname] = "Query not executed properly"
+                finally:
+                    usage = vendor_runner.stop("validation")
+                    print("Database used {:.3f} seconds of CPU time.".format(usage["cpu"]))
+                    print("Database peaked at {:.3f} MiB of memory.".format(usage["memory"] / 1024.0 / 1024.0))
+
+    benchmark_context_db_2 = BenchmarkContext(
+        vendor_name=args.vendor_name_2,
+        vendor_binary=args.vendor_binary_2,
+        benchmark_target_workload=copy.copy(args.benchmarks),
+        client_binary=args.client_binary,
+        num_workers_for_import=args.num_workers_for_import,
+        temporary_directory=args.temporary_directory,
+    )
+
+    vendor_runner = runners.BaseRunner.create(
+        benchmark_context=benchmark_context_db_2,
+    )
+    available_workloads = helpers.get_available_workloads()
+
+    workloads = helpers.filter_workloads(available_workloads, benchmark_context=benchmark_context_db_2)
+
+    client = vendor_runner.fetch_client()
+
+    results_db_2 = {}
+
+    for workload, queries in workloads:
+
+        vendor_runner.clean_db()
+
+        generated_queries = workload.dataset_generator()
+        if generated_queries:
+            vendor_runner.start_preparation("import")
+            client.execute(queries=generated_queries, num_workers=benchmark_context_db_2.num_workers_for_import)
+            vendor_runner.stop("import")
+        else:
+            workload.prepare(cache.cache_directory("datasets", workload.NAME, workload.get_variant()))
+            imported = workload.custom_import()
+            if not imported:
+                vendor_runner.start_preparation("import")
+                print("Executing database cleanup and index setup...")
+                client.execute(
+                    file_path=workload.get_index(), num_workers=benchmark_context_db_2.num_workers_for_import
+                )
+                print("Importing dataset...")
+                ret = client.execute(
+                    file_path=workload.get_file(), num_workers=benchmark_context_db_2.num_workers_for_import
+                )
+                usage = vendor_runner.stop("import")
+
+        for group in sorted(queries.keys()):
+            for query, funcname in queries[group]:
+                print("Running query:{}/{}/{}".format(group, query, funcname))
+                func = getattr(workload, funcname)
+                count = 1
+                vendor_runner.start_benchmark("validation")
+                try:
+                    ret = client.execute(queries=get_queries(func, count), num_workers=1, validation=True)[0]
+                    results_db_2[funcname] = ret["results"].items()
+                except Exception as e:
+                    print("Issue running the query" + funcname)
+                    print(e)
+                    results_db_2[funcname] = "Query not executed properly"
+                finally:
+                    usage = vendor_runner.stop("validation")
+                    print("Database used {:.3f} seconds of CPU time.".format(usage["cpu"]))
+                    print("Database peaked at {:.3f} MiB of memory.".format(usage["memory"] / 1024.0 / 1024.0))
+
+    validation = {}
+    for key in results_db_1.keys():
+        if type(results_db_1[key]) is str:
+            validation[key] = "Query not executed properly."
+        else:
+            db_1_values = set()
+            for index, value in results_db_1[key]:
+                db_1_values.add(value)
+            neo4j_values = set()
+            for index, value in results_db_2[key]:
+                neo4j_values.add(value)
+
+            if db_1_values == neo4j_values:
+                validation[key] = "Identical results"
+            else:
+                validation[key] = "Different results, check manually."
+
+    for key, value in validation.items():
+        print(key + " " + value)
--- a/tests/mgbench/workloads/init.py
+++ b/tests/mgbench/workloads/init.py
@ -0,0 +1,4 @@
+from pathlib import Path
+
+modules = Path(__file__).resolve().parent.glob("*.py")
+__all__ = [f.name[:-3] for f in modules if f.is_file() and not f.name == "__init__.py"]
--- a/tests/mgbench/workloads/base.py
+++ b/tests/mgbench/workloads/base.py
@ -0,0 +1,197 @@
+# Copyright 2022 Memgraph Ltd.
+#
+# Use of this software is governed by the Business Source License
+# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
+# License, and you may not use this file except in compliance with the Business Source License.
+#
+# As of the Change Date specified in that file, in accordance with
+# the Business Source License, use of this software will be governed
+# by the Apache License, Version 2.0, included in the file
+# licenses/APL.txt.
+
+from abc import ABC, abstractclassmethod
+from pathlib import Path
+
+import helpers
+from benchmark_context import BenchmarkContext
+
+
+# Base dataset class used as a template to create each individual dataset. All
+# common logic is handled here.
+class Workload(ABC):
+
+    # Name of the workload/dataset.
+    NAME = ""
+    # List of all variants of the workload/dataset that exist.
+    VARIANTS = ["default"]
+    # One of the available variants that should be used as the default variant.
+    DEFAULT_VARIANT = "default"
+
+    # List of local files that should be used to import the dataset.
+    LOCAL_FILE = None
+
+    # URLs of remote dataset files that should be used to import the dataset, compressed in gz format.
+    URL_FILE = None
+
+    # Index files
+    LOCAL_INDEX_FILE = None
+    URL_INDEX_FILE = None
+
+    # Number of vertices/edges for each variant.
+    SIZES = {
+        "default": {"vertices": 0, "edges": 0},
+    }
+
+    # Indicates whether the dataset has properties on edges.
+    PROPERTIES_ON_EDGES = False
+
+    def __init_subclass__(cls) -> None:
+        name_prerequisite = "NAME" in cls.__dict__
+        generator_prerequisite = "dataset_generator" in cls.__dict__
+        custom_import_prerequisite = "custom_import" in cls.__dict__
+        basic_import_prerequisite = ("LOCAL_FILE" in cls.__dict__ or "URL_FILE" in cls.__dict__) and (
+            "LOCAL_INDEX_FILE" in cls.__dict__ or "URL_INDEX_FILE" in cls.__dict__
+        )
+
+        if not name_prerequisite:
+            raise ValueError(
+                """Can't define a workload class {} without NAME property:
+                                NAME = "dataset name"
+                                Name property defines the workload you want to execute, for example: "demo/*/*/*"
+                            """.format(
+                    cls.__name__
+                )
+            )
+
+        # Check workload is in generator or dataset mode during interpretation (not both), not runtime
+        if generator_prerequisite and (custom_import_prerequisite or basic_import_prerequisite):
+            raise ValueError(
+                """
+                                The workload class {} cannot have defined dataset import and generate dataset at
+                                the same time.
+                                 """.format(
+                    cls.__name__
+                )
+            )
+
+        if not generator_prerequisite and (not custom_import_prerequisite and not basic_import_prerequisite):
+            raise ValueError(
+                """
+                                The workload class {} need to have defined dataset import or dataset generator
+                                """.format(
+                    cls.__name__
+                )
+            )
+
+        return super().__init_subclass__()
+
+    def __init__(self, variant: str = None, benchmark_context: BenchmarkContext = None):
+        """
+        Accepts a `variant` variable that indicates which variant
+        of the dataset should be executed
+        """
+        self.benchmark_context = benchmark_context
+        self._variant = variant
+        self._vendor = benchmark_context.vendor_name
+        self._file = None
+        self._file_index = None
+
+        if self.NAME == "":
+            raise ValueError("Give your workload a name, by setting self.NAME")
+
+        if variant is None:
+            variant = self.DEFAULT_VARIANT
+        if variant not in self.VARIANTS:
+            raise ValueError("Invalid test variant!")
+        if (self.LOCAL_FILE and variant not in self.LOCAL_FILE) and (self.URL_FILE and variant not in self.URL_FILE):
+            raise ValueError("The variant doesn't have a defined URL or LOCAL file path!")
+        if variant not in self.SIZES:
+            raise ValueError("The variant doesn't have a defined dataset " "size!")
+
+        if (self.LOCAL_INDEX_FILE and self._vendor not in self.LOCAL_INDEX_FILE) and (
+            self.URL_INDEX_FILE and self._vendor not in self.URL_INDEX_FILE
+        ):
+            raise ValueError("Vendor does not have INDEX for dataset!")
+
+        if self.LOCAL_FILE is not None:
+            self._local_file = self.LOCAL_FILE.get(variant, None)
+        else:
+            self._local_file = None
+
+        if self.URL_FILE is not None:
+            self._url_file = self.URL_FILE.get(variant, None)
+        else:
+            self._url_file = None
+
+        if self.LOCAL_INDEX_FILE is not None:
+            self._local_index = self.LOCAL_INDEX_FILE.get(self._vendor, None)
+        else:
+            self._local_index = None
+
+        if self.URL_INDEX_FILE is not None:
+            self._url_index = self.URL_INDEX_FILE.get(self._vendor, None)
+        else:
+            self._url_index = None
+
+        self._size = self.SIZES[variant]
+        if "vertices" in self._size or "edges" in self._size:
+            self._num_vertices = self._size["vertices"]
+            self._num_edges = self._size["edges"]
+
+    def prepare(self, directory):
+        if self._local_file is not None:
+            print("Using local dataset file:", self._local_file)
+            self._file = self._local_file
+        elif self._url_file is not None:
+            cached_input, exists = directory.get_file("dataset.cypher")
+            if not exists:
+                print("Downloading dataset file:", self._url_file)
+                downloaded_file = helpers.download_file(self._url_file, directory.get_path())
+                print("Unpacking and caching file:", downloaded_file)
+                helpers.unpack_gz_and_move_file(downloaded_file, cached_input)
+            print("Using cached dataset file:", cached_input)
+            self._file = cached_input
+
+        if self._local_index is not None:
+            print("Using local index file:", self._local_index)
+            self._file_index = self._local_index
+        elif self._url_index is not None:
+            cached_index, exists = directory.get_file(self._vendor + ".cypher")
+            if not exists:
+                print("Downloading index file:", self._url_index)
+                downloaded_file = helpers.download_file(self._url_index, directory.get_path())
+                print("Unpacking and caching file:", downloaded_file)
+                helpers.unpack_gz_and_move_file(downloaded_file, cached_index)
+            print("Using cached index file:", cached_index)
+            self._file_index = cached_index
+
+    def get_variant(self):
+        """Returns the current variant of the dataset."""
+        return self._variant
+
+    def get_index(self):
+        """Get index file, defined by vendor"""
+        return self._file_index
+
+    def get_file(self):
+        """
+        Returns path to the file that contains dataset creation queries.
+        """
+        return self._file
+
+    def get_size(self):
+        """Returns number of vertices/edges for the current variant."""
+        return self._size
+
+    def custom_import(self) -> bool:
+        print("Workload does not have a custom import")
+        return False
+
+    def dataset_generator(self) -> list:
+        print("Workload is not auto generated")
+        return []
+
+    # All tests should be query generator functions that output all of the
+    # queries that should be executed by the runner. The functions should be
+    # named `benchmark__GROUPNAME__TESTNAME` and should not accept any
+    # arguments.
--- a/tests/mgbench/workloads/demo.py
+++ b/tests/mgbench/workloads/demo.py
@ -0,0 +1,28 @@
+import random
+
+from workloads.base import Workload
+
+
+class Demo(Workload):
+
+    NAME = "demo"
+
+    def dataset_generator(self):
+
+        queries = [("MATCH (n) DETACH DELETE n;", {})]
+        for i in range(0, 100):
+            queries.append(("CREATE (:NodeA{{ id:{}}});".format(i), {}))
+            queries.append(("CREATE (:NodeB{{ id:{}}});".format(i), {}))
+
+        for i in range(0, 100):
+            a = random.randint(0, 99)
+            b = random.randint(0, 99)
+            queries.append(("MATCH(a:NodeA{{ id: {}}}),(b:NodeB{{id: {}}}) CREATE (a)-[:EDGE]->(b)".format(a, b), {}))
+
+        return queries
+
+    def benchmark__test__sample_query1(self):
+        return ("MATCH (n) RETURN n", {})
+
+    def benchmark__test__sample_query2(self):
+        return ("MATCH (n) RETURN n", {})
--- a/tests/mgbench/workloads/importers/init.py
+++ b/tests/mgbench/workloads/importers/init.py
--- a/tests/mgbench/workloads/importers/importer_ldbc_bi.py
+++ b/tests/mgbench/workloads/importers/importer_ldbc_bi.py
@ -0,0 +1,213 @@
+import csv
+import subprocess
+from collections import defaultdict
+from pathlib import Path
+
+import helpers
+from benchmark_context import BenchmarkContext
+from runners import BaseRunner
+
+HEADERS_URL = "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/bi/headers.tar.gz"
+
+
+class ImporterLDBCBI:
+    def __init__(
+        self, benchmark_context: BenchmarkContext, dataset_name: str, variant: str, index_file: str, csv_dict: dict
+    ) -> None:
+        self._benchmark_context = benchmark_context
+        self._dataset_name = dataset_name
+        self._variant = variant
+        self._index_file = index_file
+        self._csv_dict = csv_dict
+
+    def execute_import(self):
+
+        vendor_runner = BaseRunner.create(
+            benchmark_context=self._benchmark_context,
+        )
+        client = vendor_runner.fetch_client()
+
+        if self._benchmark_context.vendor_name == "neo4j":
+            data_dir = Path() / ".cache" / "datasets" / self._dataset_name / self._variant / "data_neo4j"
+            data_dir.mkdir(parents=True, exist_ok=True)
+            dir_name = self._csv_dict[self._variant].split("/")[-1:][0].removesuffix(".tar.zst")
+            if (data_dir / dir_name).exists():
+                print("Files downloaded")
+                data_dir = data_dir / dir_name
+            else:
+                print("Downloading files")
+                downloaded_file = helpers.download_file(self._csv_dict[self._variant], data_dir.absolute())
+                print("Unpacking the file..." + downloaded_file)
+                data_dir = helpers.unpack_tar_zst(Path(downloaded_file))
+
+            headers_dir = Path() / ".cache" / "datasets" / self._dataset_name / self._variant / "headers_neo4j"
+            headers_dir.mkdir(parents=True, exist_ok=True)
+            headers = HEADERS_URL.split("/")[-1:][0].removesuffix(".tar.gz")
+            if (headers_dir / headers).exists():
+                print("Header files downloaded.")
+            else:
+                print("Downloading files")
+                downloaded_file = helpers.download_file(HEADERS_URL, headers_dir.absolute())
+                print("Unpacking the file..." + downloaded_file)
+                headers_dir = helpers.unpack_tar_gz(Path(downloaded_file))
+
+            input_headers = {}
+            for header_file in headers_dir.glob("**/*.csv"):
+                key = "/".join(header_file.parts[-2:])[0:-4]
+                input_headers[key] = header_file.as_posix()
+
+            for data_file in data_dir.glob("**/*.gz"):
+                if "initial_snapshot" in data_file.parts:
+                    data_file = helpers.unpack_gz(data_file)
+                    output = data_file.parent / (data_file.stem + "_neo" + ".csv")
+                    if not output.exists():
+                        with data_file.open("r") as input_f, output.open("a") as output_f:
+                            reader = csv.reader(input_f, delimiter="|")
+                            header = next(reader)
+                            writer = csv.writer(output_f, delimiter="|")
+                            for line in reader:
+                                writer.writerow(line)
+                    else:
+                        print("Files converted")
+
+            input_files = defaultdict(list)
+            for neo_file in data_dir.glob("**/*_neo.csv"):
+                key = "/".join(neo_file.parts[-3:-1])
+                input_files[key].append(neo_file.as_posix())
+
+            vendor_runner.clean_db()
+            subprocess.run(
+                args=[
+                    vendor_runner._neo4j_admin,
+                    "database",
+                    "import",
+                    "full",
+                    "--id-type=INTEGER",
+                    "--ignore-empty-strings=true",
+                    "--bad-tolerance=0",
+                    "--nodes=Place=" + input_headers["static/Place"] + "," + ",".join(input_files["static/Place"]),
+                    "--nodes=Organisation="
+                    + input_headers["static/Organisation"]
+                    + ","
+                    + ",".join(input_files["static/Organisation"]),
+                    "--nodes=TagClass="
+                    + input_headers["static/TagClass"]
+                    + ","
+                    + ",".join(input_files["static/TagClass"]),
+                    "--nodes=Tag=" + input_headers["static/Tag"] + "," + ",".join(input_files["static/Tag"]),
+                    "--nodes=Forum=" + input_headers["dynamic/Forum"] + "," + ",".join(input_files["dynamic/Forum"]),
+                    "--nodes=Person=" + input_headers["dynamic/Person"] + "," + ",".join(input_files["dynamic/Person"]),
+                    "--nodes=Message:Comment="
+                    + input_headers["dynamic/Comment"]
+                    + ","
+                    + ",".join(input_files["dynamic/Comment"]),
+                    "--nodes=Message:Post="
+                    + input_headers["dynamic/Post"]
+                    + ","
+                    + ",".join(input_files["dynamic/Post"]),
+                    "--relationships=IS_PART_OF="
+                    + input_headers["static/Place_isPartOf_Place"]
+                    + ","
+                    + ",".join(input_files["static/Place_isPartOf_Place"]),
+                    "--relationships=IS_SUBCLASS_OF="
+                    + input_headers["static/TagClass_isSubclassOf_TagClass"]
+                    + ","
+                    + ",".join(input_files["static/TagClass_isSubclassOf_TagClass"]),
+                    "--relationships=IS_LOCATED_IN="
+                    + input_headers["static/Organisation_isLocatedIn_Place"]
+                    + ","
+                    + ",".join(input_files["static/Organisation_isLocatedIn_Place"]),
+                    "--relationships=HAS_TYPE="
+                    + input_headers["static/Tag_hasType_TagClass"]
+                    + ","
+                    + ",".join(input_files["static/Tag_hasType_TagClass"]),
+                    "--relationships=HAS_CREATOR="
+                    + input_headers["dynamic/Comment_hasCreator_Person"]
+                    + ","
+                    + ",".join(input_files["dynamic/Comment_hasCreator_Person"]),
+                    "--relationships=IS_LOCATED_IN="
+                    + input_headers["dynamic/Comment_isLocatedIn_Country"]
+                    + ","
+                    + ",".join(input_files["dynamic/Comment_isLocatedIn_Country"]),
+                    "--relationships=REPLY_OF="
+                    + input_headers["dynamic/Comment_replyOf_Comment"]
+                    + ","
+                    + ",".join(input_files["dynamic/Comment_replyOf_Comment"]),
+                    "--relationships=REPLY_OF="
+                    + input_headers["dynamic/Comment_replyOf_Post"]
+                    + ","
+                    + ",".join(input_files["dynamic/Comment_replyOf_Post"]),
+                    "--relationships=CONTAINER_OF="
+                    + input_headers["dynamic/Forum_containerOf_Post"]
+                    + ","
+                    + ",".join(input_files["dynamic/Forum_containerOf_Post"]),
+                    "--relationships=HAS_MEMBER="
+                    + input_headers["dynamic/Forum_hasMember_Person"]
+                    + ","
+                    + ",".join(input_files["dynamic/Forum_hasMember_Person"]),
+                    "--relationships=HAS_MODERATOR="
+                    + input_headers["dynamic/Forum_hasModerator_Person"]
+                    + ","
+                    + ",".join(input_files["dynamic/Forum_hasModerator_Person"]),
+                    "--relationships=HAS_TAG="
+                    + input_headers["dynamic/Forum_hasTag_Tag"]
+                    + ","
+                    + ",".join(input_files["dynamic/Forum_hasTag_Tag"]),
+                    "--relationships=HAS_INTEREST="
+                    + input_headers["dynamic/Person_hasInterest_Tag"]
+                    + ","
+                    + ",".join(input_files["dynamic/Person_hasInterest_Tag"]),
+                    "--relationships=IS_LOCATED_IN="
+                    + input_headers["dynamic/Person_isLocatedIn_City"]
+                    + ","
+                    + ",".join(input_files["dynamic/Person_isLocatedIn_City"]),
+                    "--relationships=KNOWS="
+                    + input_headers["dynamic/Person_knows_Person"]
+                    + ","
+                    + ",".join(input_files["dynamic/Person_knows_Person"]),
+                    "--relationships=LIKES="
+                    + input_headers["dynamic/Person_likes_Comment"]
+                    + ","
+                    + ",".join(input_files["dynamic/Person_likes_Comment"]),
+                    "--relationships=LIKES="
+                    + input_headers["dynamic/Person_likes_Post"]
+                    + ","
+                    + ",".join(input_files["dynamic/Person_likes_Post"]),
+                    "--relationships=HAS_CREATOR="
+                    + input_headers["dynamic/Post_hasCreator_Person"]
+                    + ","
+                    + ",".join(input_files["dynamic/Post_hasCreator_Person"]),
+                    "--relationships=HAS_TAG="
+                    + input_headers["dynamic/Comment_hasTag_Tag"]
+                    + ","
+                    + ",".join(input_files["dynamic/Comment_hasTag_Tag"]),
+                    "--relationships=HAS_TAG="
+                    + input_headers["dynamic/Post_hasTag_Tag"]
+                    + ","
+                    + ",".join(input_files["dynamic/Post_hasTag_Tag"]),
+                    "--relationships=IS_LOCATED_IN="
+                    + input_headers["dynamic/Post_isLocatedIn_Country"]
+                    + ","
+                    + ",".join(input_files["dynamic/Post_isLocatedIn_Country"]),
+                    "--relationships=STUDY_AT="
+                    + input_headers["dynamic/Person_studyAt_University"]
+                    + ","
+                    + ",".join(input_files["dynamic/Person_studyAt_University"]),
+                    "--relationships=WORK_AT="
+                    + input_headers["dynamic/Person_workAt_Company"]
+                    + ","
+                    + ",".join(input_files["dynamic/Person_workAt_Company"]),
+                    "--delimiter",
+                    "|",
+                    "neo4j",
+                ],
+                check=True,
+            )
+
+            vendor_runner.start_preparation("Index preparation")
+            print("Executing database index setup")
+            client.execute(file_path=self._index_file, num_workers=1)
+            vendor_runner.stop("Stop index preparation")
+            return True
+        else:
+            return False
--- a/tests/mgbench/workloads/importers/importer_ldbc_interactive.py
+++ b/tests/mgbench/workloads/importers/importer_ldbc_interactive.py
@ -0,0 +1,163 @@
+import csv
+import subprocess
+from pathlib import Path
+
+import helpers
+from benchmark_context import BenchmarkContext
+from runners import BaseRunner
+
+# Removed speaks/email from person header
+HEADERS_INTERACTIVE = {
+    "static/organisation": "id:ID(Organisation)|:LABEL|name:STRING|url:STRING",
+    "static/place": "id:ID(Place)|name:STRING|url:STRING|:LABEL",
+    "static/tagclass": "id:ID(TagClass)|name:STRING|url:STRING",
+    "static/tag": "id:ID(Tag)|name:STRING|url:STRING",
+    "static/tagclass_isSubclassOf_tagclass": ":START_ID(TagClass)|:END_ID(TagClass)",
+    "static/tag_hasType_tagclass": ":START_ID(Tag)|:END_ID(TagClass)",
+    "static/organisation_isLocatedIn_place": ":START_ID(Organisation)|:END_ID(Place)",
+    "static/place_isPartOf_place": ":START_ID(Place)|:END_ID(Place)",
+    "dynamic/comment": "id:ID(Comment)|creationDate:LOCALDATETIME|locationIP:STRING|browserUsed:STRING|content:STRING|length:INT",
+    "dynamic/forum": "id:ID(Forum)|title:STRING|creationDate:LOCALDATETIME",
+    "dynamic/person": "id:ID(Person)|firstName:STRING|lastName:STRING|gender:STRING|birthday:LOCALDATETIME|creationDate:LOCALDATETIME|locationIP:STRING|browserUsed:STRING",
+    "dynamic/post": "id:ID(Post)|imageFile:STRING|creationDate:LOCALDATETIME|locationIP:STRING|browserUsed:STRING|language:STRING|content:STRING|length:INT",
+    "dynamic/comment_hasCreator_person": ":START_ID(Comment)|:END_ID(Person)",
+    "dynamic/comment_isLocatedIn_place": ":START_ID(Comment)|:END_ID(Place)",
+    "dynamic/comment_replyOf_comment": ":START_ID(Comment)|:END_ID(Comment)",
+    "dynamic/comment_replyOf_post": ":START_ID(Comment)|:END_ID(Post)",
+    "dynamic/forum_containerOf_post": ":START_ID(Forum)|:END_ID(Post)",
+    "dynamic/forum_hasMember_person": ":START_ID(Forum)|:END_ID(Person)|joinDate:LOCALDATETIME",
+    "dynamic/forum_hasModerator_person": ":START_ID(Forum)|:END_ID(Person)",
+    "dynamic/forum_hasTag_tag": ":START_ID(Forum)|:END_ID(Tag)",
+    "dynamic/person_hasInterest_tag": ":START_ID(Person)|:END_ID(Tag)",
+    "dynamic/person_isLocatedIn_place": ":START_ID(Person)|:END_ID(Place)",
+    "dynamic/person_knows_person": ":START_ID(Person)|:END_ID(Person)|creationDate:LOCALDATETIME",
+    "dynamic/person_likes_comment": ":START_ID(Person)|:END_ID(Comment)|creationDate:LOCALDATETIME",
+    "dynamic/person_likes_post": ":START_ID(Person)|:END_ID(Post)|creationDate:LOCALDATETIME",
+    "dynamic/person_studyAt_organisation": ":START_ID(Person)|:END_ID(Organisation)|classYear:INT",
+    "dynamic/person_workAt_organisation": ":START_ID(Person)|:END_ID(Organisation)|workFrom:INT",
+    "dynamic/post_hasCreator_person": ":START_ID(Post)|:END_ID(Person)",
+    "dynamic/comment_hasTag_tag": ":START_ID(Comment)|:END_ID(Tag)",
+    "dynamic/post_hasTag_tag": ":START_ID(Post)|:END_ID(Tag)",
+    "dynamic/post_isLocatedIn_place": ":START_ID(Post)|:END_ID(Place)",
+}
+
+
+class ImporterLDBCInteractive:
+    def __init__(
+        self, benchmark_context: BenchmarkContext, dataset_name: str, variant: str, index_file: str, csv_dict: dict
+    ) -> None:
+        self._benchmark_context = benchmark_context
+        self._dataset_name = dataset_name
+        self._variant = variant
+        self._index_file = index_file
+        self._csv_dict = csv_dict
+
+    def execute_import(self):
+
+        vendor_runner = BaseRunner.create(
+            benchmark_context=self._benchmark_context,
+        )
+        client = vendor_runner.fetch_client()
+
+        if self._benchmark_context.vendor_name == "neo4j":
+            print("Runnning Neo4j import")
+            dump_dir = Path() / ".cache" / "datasets" / self._dataset_name / self._variant / "dump"
+            dump_dir.mkdir(parents=True, exist_ok=True)
+            dir_name = self._csv_dict[self._variant].split("/")[-1:][0].removesuffix(".tar.zst")
+            if (dump_dir / dir_name).exists():
+                print("Files downloaded")
+                dump_dir = dump_dir / dir_name
+            else:
+                print("Downloading files")
+                downloaded_file = helpers.download_file(self._csv_dict[self._variant], dump_dir.absolute())
+                print("Unpacking the file..." + downloaded_file)
+                dump_dir = helpers.unpack_tar_zst(Path(downloaded_file))
+
+            input_files = {}
+            for file in dump_dir.glob("*/*0.csv"):
+                parts = file.parts[-2:]
+                key = parts[0] + "/" + parts[1][:-8]
+                input_files[key] = file
+
+            output_files = {}
+            for key, file in input_files.items():
+                output = file.parent / (file.stem + "_neo" + ".csv")
+                if not output.exists():
+                    with file.open("r") as input_f, output.open("a") as output_f:
+                        reader = csv.reader(input_f, delimiter="|")
+                        header = next(reader)
+
+                        writer = csv.writer(output_f, delimiter="|")
+                        if key in HEADERS_INTERACTIVE.keys():
+                            updated_header = HEADERS_INTERACTIVE[key].split("|")
+                            writer.writerow(updated_header)
+                        for line in reader:
+                            if "creationDate" in header:
+                                pos = header.index("creationDate")
+                                line[pos] = line[pos][0:-5]
+                            elif "joinDate" in header:
+                                pos = header.index("joinDate")
+                                line[pos] = line[pos][0:-5]
+
+                            if "organisation_0_0.csv" == file.name:
+                                writer.writerow([line[0], line[1].capitalize(), line[2], line[3]])
+                            elif "place_0_0.csv" == file.name:
+                                writer.writerow([line[0], line[1], line[2], line[3].capitalize()])
+                            else:
+                                writer.writerow(line)
+
+                output_files[key] = output.as_posix()
+            vendor_runner.clean_db()
+            subprocess.run(
+                args=[
+                    vendor_runner._neo4j_admin,
+                    "database",
+                    "import",
+                    "full",
+                    "--id-type=INTEGER",
+                    "--nodes=Place=" + output_files["static/place"],
+                    "--nodes=Organisation=" + output_files["static/organisation"],
+                    "--nodes=TagClass=" + output_files["static/tagclass"],
+                    "--nodes=Tag=" + output_files["static/tag"],
+                    "--nodes=Comment:Message=" + output_files["dynamic/comment"],
+                    "--nodes=Forum=" + output_files["dynamic/forum"],
+                    "--nodes=Person=" + output_files["dynamic/person"],
+                    "--nodes=Post:Message=" + output_files["dynamic/post"],
+                    "--relationships=IS_PART_OF=" + output_files["static/place_isPartOf_place"],
+                    "--relationships=IS_SUBCLASS_OF=" + output_files["static/tagclass_isSubclassOf_tagclass"],
+                    "--relationships=IS_LOCATED_IN=" + output_files["static/organisation_isLocatedIn_place"],
+                    "--relationships=HAS_TYPE=" + output_files["static/tag_hasType_tagclass"],
+                    "--relationships=HAS_CREATOR=" + output_files["dynamic/comment_hasCreator_person"],
+                    "--relationships=IS_LOCATED_IN=" + output_files["dynamic/comment_isLocatedIn_place"],
+                    "--relationships=REPLY_OF=" + output_files["dynamic/comment_replyOf_comment"],
+                    "--relationships=REPLY_OF=" + output_files["dynamic/comment_replyOf_post"],
+                    "--relationships=CONTAINER_OF=" + output_files["dynamic/forum_containerOf_post"],
+                    "--relationships=HAS_MEMBER=" + output_files["dynamic/forum_hasMember_person"],
+                    "--relationships=HAS_MODERATOR=" + output_files["dynamic/forum_hasModerator_person"],
+                    "--relationships=HAS_TAG=" + output_files["dynamic/forum_hasTag_tag"],
+                    "--relationships=HAS_INTEREST=" + output_files["dynamic/person_hasInterest_tag"],
+                    "--relationships=IS_LOCATED_IN=" + output_files["dynamic/person_isLocatedIn_place"],
+                    "--relationships=KNOWS=" + output_files["dynamic/person_knows_person"],
+                    "--relationships=LIKES=" + output_files["dynamic/person_likes_comment"],
+                    "--relationships=LIKES=" + output_files["dynamic/person_likes_post"],
+                    "--relationships=HAS_CREATOR=" + output_files["dynamic/post_hasCreator_person"],
+                    "--relationships=HAS_TAG=" + output_files["dynamic/comment_hasTag_tag"],
+                    "--relationships=HAS_TAG=" + output_files["dynamic/post_hasTag_tag"],
+                    "--relationships=IS_LOCATED_IN=" + output_files["dynamic/post_isLocatedIn_place"],
+                    "--relationships=STUDY_AT=" + output_files["dynamic/person_studyAt_organisation"],
+                    "--relationships=WORK_AT=" + output_files["dynamic/person_workAt_organisation"],
+                    "--delimiter",
+                    "|",
+                    "neo4j",
+                ],
+                check=True,
+            )
+
+            vendor_runner.start_preparation("Index preparation")
+            print("Executing database index setup")
+            client.execute(file_path=self._index_file, num_workers=1)
+            vendor_runner.stop("Stop index preparation")
+
+            return True
+        else:
+            return False
--- a/tests/mgbench/workloads/importers/importer_pokec.py
+++ b/tests/mgbench/workloads/importers/importer_pokec.py
@ -0,0 +1,41 @@
+from pathlib import Path
+
+from benchmark_context import BenchmarkContext
+from runners import BaseRunner
+
+
+class ImporterPokec:
+    def __init__(
+        self, benchmark_context: BenchmarkContext, dataset_name: str, variant: str, index_file: str, dataset_file: str
+    ) -> None:
+        self._benchmark_context = benchmark_context
+        self._dataset_name = dataset_name
+        self._variant = variant
+        self._index_file = index_file
+        self._dataset_file = dataset_file
+
+    def execute_import(self):
+        if self._benchmark_context.vendor_name == "neo4j":
+
+            vendor_runner = BaseRunner.create(
+                benchmark_context=self._benchmark_context,
+            )
+            client = vendor_runner.fetch_client()
+            vendor_runner.clean_db()
+            vendor_runner.start_preparation("preparation")
+            print("Executing database cleanup and index setup...")
+            client.execute(file_path=self._index_file, num_workers=1)
+            vendor_runner.stop("preparation")
+            neo4j_dump = Path() / ".cache" / "datasets" / self._dataset_name / self._variant / "neo4j.dump"
+            if neo4j_dump.exists():
+                vendor_runner.load_db_from_dump(path=neo4j_dump.parent)
+            else:
+                vendor_runner.start_preparation("import")
+                print("Importing dataset...")
+                client.execute(file_path=self._dataset_file, num_workers=self._benchmark_context.num_workers_for_import)
+                vendor_runner.stop("import")
+                vendor_runner.dump_db(path=neo4j_dump.parent)
+
+            return True
+        else:
+            return False
--- a/tests/mgbench/workloads/ldbc_bi.py
+++ b/tests/mgbench/workloads/ldbc_bi.py
@ -0,0 +1,708 @@
+import inspect
+import random
+from pathlib import Path
+
+import helpers
+from benchmark_context import BenchmarkContext
+from workloads.base import Workload
+from workloads.importers.importer_ldbc_bi import ImporterLDBCBI
+
+
+class LDBC_BI(Workload):
+    NAME = "ldbc_bi"
+    VARIANTS = ["sf1", "sf3", "sf10"]
+    DEFAULT_VARIANT = "sf1"
+
+    URL_FILE = {
+        "sf1": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/bi/ldbc_bi_sf1.cypher.gz",
+        "sf3": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/bi/ldbc_bi_sf3.cypher.gz",
+        "sf10": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/bi/ldbc_bi_sf10.cypher.gz",
+    }
+
+    URL_CSV = {
+        "sf1": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/bi-sf1-composite-projected-fk.tar.zst",
+        "sf3": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/bi-sf3-composite-projected-fk.tar.zst",
+        "sf10": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/bi-sf10-composite-projected-fk.tar.zst",
+    }
+
+    SIZES = {
+        "sf1": {"vertices": 2997352, "edges": 17196776},
+        "sf3": {"vertices": 1, "edges": 1},
+        "sf10": {"vertices": 1, "edges": 1},
+    }
+
+    LOCAL_INDEX_FILES = None
+
+    URL_INDEX_FILE = {
+        "memgraph": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/bi/memgraph_bi_index.cypher",
+        "neo4j": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/bi/neo4j_bi_index.cypher",
+    }
+
+    QUERY_PARAMETERS = {
+        "sf1": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/parameters-2022-10-01.zip",
+        "sf3": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/parameters-2022-10-01.zip",
+        "sf10": "https://pub-383410a98aef4cb686f0c7601eddd25f.r2.dev/bi-pre-audit/parameters-2022-10-01.zip",
+    }
+
+    def custom_import(self) -> bool:
+        importer = ImporterLDBCBI(
+            benchmark_context=self.benchmark_context,
+            dataset_name=self.NAME,
+            variant=self._variant,
+            index_file=self._file_index,
+            csv_dict=self.URL_CSV,
+        )
+        return importer.execute_import()
+
+    def _prepare_parameters_directory(self):
+        parameters = Path() / ".cache" / "datasets" / self.NAME / self._variant / "parameters"
+        parameters.mkdir(parents=True, exist_ok=True)
+        if parameters.exists() and any(parameters.iterdir()):
+            print("Files downloaded.")
+        else:
+            print("Downloading files")
+            downloaded_file = helpers.download_file(self.QUERY_PARAMETERS[self._variant], parameters.parent.absolute())
+            print("Unpacking the file..." + downloaded_file)
+            parameters = helpers.unpack_zip(Path(downloaded_file))
+        return parameters / ("parameters-" + self._variant)
+
+    def _get_query_parameters(self) -> dict:
+        func_name = inspect.stack()[1].function
+        parameters = {}
+        for file in self._parameters_dir.glob("bi-*.csv"):
+            file_name_query_id = file.name.split("-")[1][0:-4]
+            func_name_id = func_name.split("_")[-1]
+            if file_name_query_id == func_name_id or file_name_query_id == func_name_id + "a":
+                with file.open("r") as input:
+                    lines = input.readlines()
+                    header = lines[0].strip("\n").split("|")
+                    position = random.randint(1, len(lines) - 1)
+                    data = lines[position].strip("\n").split("|")
+                    for i in range(len(header)):
+                        key, value_type = header[i].split(":")
+                        if value_type == "DATETIME":
+                            # Drop time zone
+                            converted = data[i][0:-6]
+                            parameters[key] = converted
+                        elif value_type == "DATE":
+                            converted = data[i] + "T00:00:00"
+                            parameters[key] = converted
+                        elif value_type == "INT":
+                            parameters[key] = int(data[i])
+                        elif value_type == "STRING[]":
+                            elements = data[i].split(";")
+                            parameters[key] = elements
+                        else:
+                            parameters[key] = data[i]
+                break
+
+        return parameters
+
+    def __init__(self, variant=None, benchmark_context: BenchmarkContext = None):
+        super().__init__(variant, benchmark_context=benchmark_context)
+        self._parameters_dir = self._prepare_parameters_directory()
+
+    def benchmark__bi__query_1_analytical(self):
+
+        memgraph = (
+            """
+            MATCH (message:Message)
+            WHERE message.creationDate < localDateTime($datetime)
+            WITH count(message) AS totalMessageCountInt
+            WITH toFloat(totalMessageCountInt) AS totalMessageCount
+            MATCH (message:Message)
+            WHERE message.creationDate < localDateTime($datetime)
+            AND message.content IS NOT NULL
+            WITH
+                totalMessageCount,
+                message,
+                message.creationDate.year AS year
+            WITH
+                totalMessageCount,
+                year,
+                message:Comment AS isComment,
+                CASE
+                    WHEN message.length <  40 THEN 0
+                    WHEN message.length <  80 THEN 1
+                    WHEN message.length < 160 THEN 2
+                    ELSE                           3
+                END AS lengthCategory,
+                count(message) AS messageCount,
+                sum(message.length) / toFloat(count(message)) AS averageMessageLength,
+                sum(message.length) AS sumMessageLength
+            RETURN
+                year,
+                isComment,
+                lengthCategory,
+                messageCount,
+                averageMessageLength,
+                sumMessageLength,
+                messageCount / totalMessageCount AS percentageOfMessages
+            ORDER BY
+                year DESC,
+                isComment ASC,
+                lengthCategory ASC
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+        neo4j = (
+            """
+            MATCH (message:Message)
+            WHERE message.creationDate < DateTime($datetime)
+            WITH count(message) AS totalMessageCountInt
+            WITH toFloat(totalMessageCountInt) AS totalMessageCount
+            MATCH (message:Message)
+            WHERE message.creationDate < DateTime($datetime)
+            AND message.content IS NOT NULL
+            WITH
+                totalMessageCount,
+                message,
+                message.creationDate.year AS year
+            WITH
+                totalMessageCount,
+                year,
+                message:Comment AS isComment,
+                CASE
+                    WHEN message.length <  40 THEN 0
+                    WHEN message.length <  80 THEN 1
+                    WHEN message.length < 160 THEN 2
+                    ELSE                           3
+                END AS lengthCategory,
+                count(message) AS messageCount,
+                sum(message.length) / toFloat(count(message)) AS averageMessageLength,
+                sum(message.length) AS sumMessageLength
+            RETURN
+                year,
+                isComment,
+                lengthCategory,
+                messageCount,
+                averageMessageLength,
+                sumMessageLength,
+                messageCount / totalMessageCount AS percentageOfMessages
+            ORDER BY
+                year DESC,
+                isComment ASC,
+                lengthCategory ASC
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+        if self._vendor == "memgraph":
+            return memgraph
+        else:
+            return neo4j
+
+    def benchmark__bi__query_2_analytical(self):
+
+        memgraph = (
+            """
+            MATCH (tag:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass})
+            OPTIONAL MATCH (message1:Message)-[:HAS_TAG]->(tag)
+            WHERE localDateTime($date) <= message1.creationDate
+                AND message1.creationDate < localDateTime($date) + duration({day: 100})
+            WITH tag, count(message1) AS countWindow1
+            OPTIONAL MATCH (message2:Message)-[:HAS_TAG]->(tag)
+            WHERE localDateTime($date) + duration({day: 100}) <= message2.creationDate
+                AND message2.creationDate < localDateTime($date) + duration({day: 200})
+            WITH
+                tag,
+                countWindow1,
+                count(message2) AS countWindow2
+            RETURN
+                tag.name,
+                countWindow1,
+                countWindow2,
+                abs(countWindow1 - countWindow2) AS diff
+            ORDER BY
+                diff DESC,
+                tag.name ASC
+            LIMIT 100
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+        neo4j = (
+            """
+            MATCH (tag:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass})
+            OPTIONAL MATCH (message1:Message)-[:HAS_TAG]->(tag)
+            WHERE DateTime($date) <= message1.creationDate
+                AND message1.creationDate < DateTime($date) + duration({days: 100})
+            WITH tag, count(message1) AS countWindow1
+            OPTIONAL MATCH (message2:Message)-[:HAS_TAG]->(tag)
+            WHERE DateTime($date) + duration({days: 100}) <= message2.creationDate
+                AND message2.creationDate < DateTime($date) + duration({days: 200})
+            WITH
+                tag,
+                countWindow1,
+                count(message2) AS countWindow2
+            RETURN
+                tag.name,
+                countWindow1,
+                countWindow2,
+                abs(countWindow1 - countWindow2) AS diff
+            ORDER BY
+                diff DESC,
+                tag.name ASC
+            LIMIT 100
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+        if self._vendor == "memgraph":
+            return memgraph
+        else:
+            return neo4j
+
+    def benchmark__bi__query_3_analytical(self):
+        return (
+            """
+            MATCH
+                (:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-
+                (person:Person)<-[:HAS_MODERATOR]-(forum:Forum)-[:CONTAINER_OF]->
+                (post:Post)<-[:REPLY_OF*0..]-(message:Message)-[:HAS_TAG]->(:Tag)-[:HAS_TYPE]->(:TagClass {name: $tagClass})
+            RETURN
+                forum.id as id,
+                forum.title,
+                person.id,
+                count(DISTINCT message) AS messageCount
+            ORDER BY
+                messageCount DESC,
+                id ASC
+            LIMIT 20
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+    def benchmark__bi__query_5_analytical(self):
+        return (
+            """
+            MATCH (tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message)-[:HAS_CREATOR]->(person:Person)
+            OPTIONAL MATCH (message)<-[likes:LIKES]-(:Person)
+            WITH person, message, count(likes) AS likeCount
+            OPTIONAL MATCH (message)<-[:REPLY_OF]-(reply:Comment)
+            WITH person, message, likeCount, count(reply) AS replyCount
+            WITH person, count(message) AS messageCount, sum(likeCount) AS likeCount, sum(replyCount) AS replyCount
+            RETURN
+                person.id,
+                replyCount,
+                likeCount,
+                messageCount,
+                1*messageCount + 2*replyCount + 10*likeCount AS score
+            ORDER BY
+                score DESC,
+                person.id ASC
+            LIMIT 100
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+    def benchmark__bi__query_6_analytical(self):
+        return (
+            """
+            MATCH (tag:Tag {name: $tag})<-[:HAS_TAG]-(message1:Message)-[:HAS_CREATOR]->(person1:Person)
+            OPTIONAL MATCH (message1)<-[:LIKES]-(person2:Person)
+            OPTIONAL MATCH (person2)<-[:HAS_CREATOR]-(message2:Message)<-[like:LIKES]-(person3:Person)
+            RETURN
+                person1.id as id,
+                count(DISTINCT like) AS authorityScore
+            ORDER BY
+                authorityScore DESC,
+                id ASC
+            LIMIT 100
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+    def benchmark__bi__query_7_analytical(self):
+
+        memgraph = (
+            """
+            MATCH
+                (tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message),
+                (message)<-[:REPLY_OF]-(comment:Comment)-[:HAS_TAG]->(relatedTag:Tag)
+            OPTIONAL MATCH (comment)-[:HAS_TAG]->(tag)
+            WHERE tag IS NOT NULL
+            RETURN
+                relatedTag,
+                count(DISTINCT comment) AS count
+            ORDER BY
+                relatedTag.name ASC,
+                count DESC
+            LIMIT 100
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+        neo4j = (
+            """
+            MATCH
+                (tag:Tag {name: $tag})<-[:HAS_TAG]-(message:Message),
+                (message)<-[:REPLY_OF]-(comment:Comment)-[:HAS_TAG]->(relatedTag:Tag)
+            WHERE NOT (comment)-[:HAS_TAG]->(tag)
+            RETURN
+                relatedTag.name,
+                count(DISTINCT comment) AS count
+            ORDER BY
+                relatedTag.name ASC,
+                count DESC
+            LIMIT 100
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+        if self._vendor == "memgraph":
+            return memgraph
+        else:
+            return neo4j
+
+    def benchmark__bi__query_9_analytical(self):
+        memgraph = (
+            """
+            MATCH (person:Person)<-[:HAS_CREATOR]-(post:Post)<-[:REPLY_OF*0..]-(reply:Message)
+            WHERE  post.creationDate >= localDateTime($startDate)
+                AND  post.creationDate <= localDateTime($endDate)
+                AND reply.creationDate >= localDateTime($startDate)
+                AND reply.creationDate <= localDateTime($endDate)
+            RETURN
+                person.id as id,
+                person.firstName,
+                person.lastName,
+                count(DISTINCT post) AS threadCount,
+                count(DISTINCT reply) AS messageCount
+            ORDER BY
+                messageCount DESC,
+                id ASC
+            LIMIT 100
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+        neo4j = (
+            """
+            MATCH (person:Person)<-[:HAS_CREATOR]-(post:Post)<-[:REPLY_OF*0..]-(reply:Message)
+            WHERE  post.creationDate >= DateTime($startDate)
+                AND  post.creationDate <= DateTime($endDate)
+                AND reply.creationDate >= DateTime($startDate)
+                AND reply.creationDate <= DateTime($endDate)
+            RETURN
+                person.id as id,
+                person.firstName,
+                person.lastName,
+                count(DISTINCT post) AS threadCount,
+                count(DISTINCT reply) AS messageCount
+            ORDER BY
+                messageCount DESC,
+                id ASC
+            LIMIT 100
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+        if self._vendor == "memgraph":
+            return memgraph
+        else:
+            return neo4j
+
+    def benchmark__bi__query_11_analytical(self):
+        return (
+            """
+            MATCH (a:Person)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country:Country {name: $country}),
+                (a)-[k1:KNOWS]-(b:Person)
+            WHERE a.id < b.id
+                AND localDateTime($startDate) <= k1.creationDate AND k1.creationDate <= localDateTime($endDate)
+            WITH DISTINCT country, a, b
+            MATCH (b)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country)
+            WITH DISTINCT country, a, b
+            MATCH (b)-[k2:KNOWS]-(c:Person),
+                (c)-[:IS_LOCATED_IN]->(:City)-[:IS_PART_OF]->(country)
+            WHERE b.id < c.id
+                AND localDateTime($startDate) <= k2.creationDate AND k2.creationDate <= localDateTime($endDate)
+            WITH DISTINCT a, b, c
+            MATCH (c)-[k3:KNOWS]-(a)
+            WHERE localDateTime($startDate) <= k3.creationDate AND k3.creationDate <= localDateTime($endDate)
+            WITH DISTINCT a, b, c
+            RETURN count(*) AS count
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+    def benchmark__bi__query_12_analytical(self):
+        return (
+            """
+            MATCH (person:Person)
+            OPTIONAL MATCH (person)<-[:HAS_CREATOR]-(message:Message)-[:REPLY_OF*0..]->(post:Post)
+            WHERE message.content IS NOT NULL
+                AND message.length < $lengthThreshold
+                AND message.creationDate > localDateTime($startDate)
+                AND post.language IN $languages
+            WITH
+                person,
+                count(message) AS messageCount
+            RETURN
+                messageCount,
+                count(person) AS personCount
+            ORDER BY
+                personCount DESC,
+                messageCount DESC
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+    def benchmark__bi__query_13_analytical(self):
+        memgraph = (
+            """
+            MATCH (country:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(zombie:Person)
+            WHERE zombie.creationDate < localDateTime($endDate)
+            WITH country, zombie
+            OPTIONAL MATCH (zombie)<-[:HAS_CREATOR]-(message:Message)
+            WHERE message.creationDate < localDateTime($endDate)
+            WITH
+                country,
+                zombie,
+                count(message) AS messageCount
+            WITH
+                country,
+                zombie,
+                12 * (localDateTime($endDate).year  - zombie.creationDate.year )
+                    + (localDateTime($endDate).month - zombie.creationDate.month)
+                    + 1 AS months,
+                messageCount
+            WHERE messageCount / months < 1
+            WITH
+                country,
+                collect(zombie) AS zombies
+            UNWIND zombies AS zombie
+            OPTIONAL MATCH
+                (zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerZombie:Person)
+            WHERE likerZombie IN zombies
+            WITH
+                zombie,
+                count(likerZombie) AS zombieLikeCount
+            OPTIONAL MATCH
+                (zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerPerson:Person)
+            WHERE likerPerson.creationDate < localDateTime($endDate)
+            WITH
+                zombie,
+                zombieLikeCount,
+                count(likerPerson) AS totalLikeCount
+            RETURN
+                zombie.id,
+                zombieLikeCount,
+                totalLikeCount,
+            CASE totalLikeCount
+            WHEN 0 THEN 0.0
+            ELSE zombieLikeCount / toFloat(totalLikeCount)
+            END AS zombieScore
+            ORDER BY
+                zombieScore DESC,
+                zombie.id ASC
+            LIMIT 100
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+        neo4j = (
+            """
+            MATCH (country:Country {name: $country})<-[:IS_PART_OF]-(:City)<-[:IS_LOCATED_IN]-(zombie:Person)
+            WHERE zombie.creationDate < DateTime($endDate)
+            WITH country, zombie
+            OPTIONAL MATCH (zombie)<-[:HAS_CREATOR]-(message:Message)
+            WHERE message.creationDate < DateTime($endDate)
+            WITH
+                country,
+                zombie,
+                count(message) AS messageCount
+            WITH
+                country,
+                zombie,
+                12 * (DateTime($endDate).year  - zombie.creationDate.year )
+                    + (DateTime($endDate).month - zombie.creationDate.month)
+                    + 1 AS months,
+                messageCount
+            WHERE messageCount / months < 1
+            WITH
+                country,
+                collect(zombie) AS zombies
+            UNWIND zombies AS zombie
+            OPTIONAL MATCH
+                (zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerZombie:Person)
+            WHERE likerZombie IN zombies
+            WITH
+                zombie,
+                count(likerZombie) AS zombieLikeCount
+            OPTIONAL MATCH
+                (zombie)<-[:HAS_CREATOR]-(message:Message)<-[:LIKES]-(likerPerson:Person)
+            WHERE likerPerson.creationDate < DateTime($endDate)
+            WITH
+                zombie,
+                zombieLikeCount,
+                count(likerPerson) AS totalLikeCount
+            RETURN
+                zombie.id,
+                zombieLikeCount,
+                totalLikeCount,
+            CASE totalLikeCount
+            WHEN 0 THEN 0.0
+            ELSE zombieLikeCount / toFloat(totalLikeCount)
+            END AS zombieScore
+            ORDER BY
+                zombieScore DESC,
+                zombie.id ASC
+            LIMIT 100
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+        if self._vendor == "memgraph":
+            return memgraph
+        else:
+            return neo4j
+
+    def benchmark__bi__query_14_analytical(self):
+        return (
+            """
+            MATCH
+                (country1:Country {name: $country1})<-[:IS_PART_OF]-(city1:City)<-[:IS_LOCATED_IN]-(person1:Person),
+                (country2:Country {name: $country2})<-[:IS_PART_OF]-(city2:City)<-[:IS_LOCATED_IN]-(person2:Person),
+                (person1)-[:KNOWS]-(person2)
+            WITH person1, person2, city1, 0 AS score
+            OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(c:Comment)-[:REPLY_OF]->(:Message)-[:HAS_CREATOR]->(person2)
+            WITH DISTINCT person1, person2, city1, score + (CASE c WHEN null THEN 0 ELSE  4 END) AS score
+            OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(m:Message)<-[:REPLY_OF]-(:Comment)-[:HAS_CREATOR]->(person2)
+            WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE  1 END) AS score
+            OPTIONAL MATCH (person1)-[:LIKES]->(m:Message)-[:HAS_CREATOR]->(person2)
+            WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE 10 END) AS score
+            OPTIONAL MATCH (person1)<-[:HAS_CREATOR]-(m:Message)<-[:LIKES]-(person2)
+            WITH DISTINCT person1, person2, city1, score + (CASE m WHEN null THEN 0 ELSE  1 END) AS score
+            ORDER BY
+                city1.name ASC,
+                score DESC,
+                person1.id ASC,
+                person2.id ASC
+            WITH city1, collect({score: score, person1Id: person1.id, person2Id: person2.id})[0] AS top
+            RETURN
+                top.person1Id,
+                top.person2Id,
+                city1.name,
+                top.score
+            ORDER BY
+                top.score DESC,
+                top.person1Id ASC,
+                top.person2Id ASC
+            LIMIT 100
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+    def benchmark__bi__query_17_analytical(self):
+
+        memgraph = (
+            """
+            MATCH
+                (tag:Tag {name: $tag}),
+                (person1:Person)<-[:HAS_CREATOR]-(message1:Message)-[:REPLY_OF*0..]->(post1:Post)<-[:CONTAINER_OF]-(forum1:Forum),
+                (message1)-[:HAS_TAG]->(tag),
+                (forum1)<-[:HAS_MEMBER]->(person2:Person)<-[:HAS_CREATOR]-(comment:Comment)-[:HAS_TAG]->(tag),
+                (forum1)<-[:HAS_MEMBER]->(person3:Person)<-[:HAS_CREATOR]-(message2:Message),
+                (comment)-[:REPLY_OF]->(message2)-[:REPLY_OF*0..]->(post2:Post)<-[:CONTAINER_OF]-(forum2:Forum)
+            MATCH (comment)-[:HAS_TAG]->(tag)
+            MATCH (message2)-[:HAS_TAG]->(tag)
+            OPTIONAL MATCH (forum2)-[:HAS_MEMBER]->(person1)
+            WHERE forum1 <> forum2 AND message2.creationDate > message1.creationDate + duration({hours: $delta}) AND person1 IS NULL
+            RETURN person1, count(DISTINCT message2) AS messageCount
+            ORDER BY messageCount DESC, person1.id ASC
+            LIMIT 10
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+        neo4j = (
+            """
+            MATCH
+                (tag:Tag {name: $tag}),
+                (person1:Person)<-[:HAS_CREATOR]-(message1:Message)-[:REPLY_OF*0..]->(post1:Post)<-[:CONTAINER_OF]-(forum1:Forum),
+                (message1)-[:HAS_TAG]->(tag),
+                (forum1)<-[:HAS_MEMBER]->(person2:Person)<-[:HAS_CREATOR]-(comment:Comment)-[:HAS_TAG]->(tag),
+                (forum1)<-[:HAS_MEMBER]->(person3:Person)<-[:HAS_CREATOR]-(message2:Message),
+                (comment)-[:REPLY_OF]->(message2)-[:REPLY_OF*0..]->(post2:Post)<-[:CONTAINER_OF]-(forum2:Forum)
+            MATCH (comment)-[:HAS_TAG]->(tag)
+            MATCH (message2)-[:HAS_TAG]->(tag)
+            WHERE forum1 <> forum2
+                AND message2.creationDate > message1.creationDate + duration({hours: $delta})
+                AND NOT (forum2)-[:HAS_MEMBER]->(person1)
+            RETURN person1, count(DISTINCT message2) AS messageCount
+            ORDER BY messageCount DESC, person1.id ASC
+            LIMIT 10
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+        if self._vendor == "memgraph":
+            return memgraph
+        else:
+            return neo4j
+
+    def benchmark__bi__query_18_analytical(self):
+
+        memgraph = (
+            """
+            MATCH (tag:Tag {name: $tag})<-[:HAS_INTEREST]-(person1:Person)-[:KNOWS]-(mutualFriend:Person)-[:KNOWS]-(person2:Person)-[:HAS_INTEREST]->(tag)
+            OPTIONAL MATCH (person1)-[:KNOWS]-(person2)
+            WHERE person1 <> person2
+            RETURN person1.id AS person1Id, person2.id AS person2Id, count(DISTINCT mutualFriend) AS mutualFriendCount
+            ORDER BY mutualFriendCount DESC, person1Id ASC, person2Id ASC
+            LIMIT 20
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+        neo4j = (
+            """
+            MATCH (tag:Tag {name: $tag})<-[:HAS_INTEREST]-(person1:Person)-[:KNOWS]-(mutualFriend:Person)-[:KNOWS]-(person2:Person)-[:HAS_INTEREST]->(tag)
+            WHERE person1 <> person2
+                AND NOT (person1)-[:KNOWS]-(person2)
+            RETURN person1.id AS person1Id, person2.id AS person2Id, count(DISTINCT mutualFriend) AS mutualFriendCount
+            ORDER BY mutualFriendCount DESC, person1Id ASC, person2Id ASC
+            LIMIT 20
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+        if self._vendor == "memgraph":
+            return memgraph
+        else:
+            return neo4j
--- a/tests/mgbench/workloads/ldbc_interactive.py
+++ b/tests/mgbench/workloads/ldbc_interactive.py
@ -0,0 +1,684 @@
+import inspect
+import random
+from datetime import datetime
+from pathlib import Path
+
+import helpers
+from benchmark_context import BenchmarkContext
+from workloads.base import Workload
+from workloads.importers.importer_ldbc_interactive import *
+
+
+class LDBC_Interactive(Workload):
+
+    NAME = "ldbc_interactive"
+    VARIANTS = ["sf0.1", "sf1", "sf3", "sf10"]
+    DEFAULT_VARIANT = "sf1"
+
+    URL_FILE = {
+        "sf0.1": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/interactive/ldbc_interactive_sf0.1.cypher.gz",
+        "sf1": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/interactive/ldbc_interactive_sf1.cypher.gz",
+        "sf3": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/interactive/ldbc_interactive_sf3.cypher.gz",
+        "sf10": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/interactive/ldbc_interactive_sf10.cypher.gz",
+    }
+    URL_CSV = {
+        "sf0.1": "https://repository.surfsara.nl/datasets/cwi/snb/files/social_network-csv_basic/social_network-csv_basic-sf0.1.tar.zst",
+        "sf1": "https://repository.surfsara.nl/datasets/cwi/snb/files/social_network-csv_basic/social_network-csv_basic-sf1.tar.zst",
+        "sf3": "https://repository.surfsara.nl/datasets/cwi/snb/files/social_network-csv_basic/social_network-csv_basic-sf3.tar.zst",
+        "sf10": "https://repository.surfsara.nl/datasets/cwi/snb/files/social_network-csv_basic/social_network-csv_basic-sf10.tar.zst",
+    }
+
+    SIZES = {
+        "sf0.1": {"vertices": 327588, "edges": 1477965},
+        "sf1": {"vertices": 3181724, "edges": 17256038},
+        "sf3": {"vertices": 1, "edges": 1},
+        "sf10": {"vertices": 1, "edges": 1},
+    }
+
+    URL_INDEX_FILE = {
+        "memgraph": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/interactive/memgraph_interactive_index.cypher",
+        "neo4j": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/ldbc/benchmark/interactive/neo4j_interactive_index.cypher",
+    }
+
+    PROPERTIES_ON_EDGES = True
+
+    QUERY_PARAMETERS = {
+        "sf0.1": "https://repository.surfsara.nl/datasets/cwi/snb/files/substitution_parameters/substitution_parameters-sf0.1.tar.zst",
+        "sf1": "https://repository.surfsara.nl/datasets/cwi/snb/files/substitution_parameters/substitution_parameters-sf0.1.tar.zst",
+        "sf3": "https://repository.surfsara.nl/datasets/cwi/snb/files/substitution_parameters/substitution_parameters-sf0.1.tar.zst",
+    }
+
+    def custom_import(self) -> bool:
+        importer = ImporterLDBCInteractive(
+            benchmark_context=self.benchmark_context,
+            dataset_name=self.NAME,
+            variant=self._variant,
+            index_file=self._file_index,
+            csv_dict=self.URL_CSV,
+        )
+        return importer.execute_import()
+
+    def _prepare_parameters_directory(self):
+        parameters = Path() / ".cache" / "datasets" / self.NAME / self._variant / "parameters"
+        parameters.mkdir(parents=True, exist_ok=True)
+        dir_name = self.QUERY_PARAMETERS[self._variant].split("/")[-1:][0].removesuffix(".tar.zst")
+        if (parameters / dir_name).exists():
+            print("Files downloaded:")
+            parameters = parameters / dir_name
+        else:
+            print("Downloading files")
+            downloaded_file = helpers.download_file(self.QUERY_PARAMETERS[self._variant], parameters.absolute())
+            print("Unpacking the file..." + downloaded_file)
+            parameters = helpers.unpack_tar_zst(Path(downloaded_file))
+        return parameters
+
+    def _get_query_parameters(self) -> dict:
+        func_name = inspect.stack()[1].function
+        parameters = {}
+        for file in self._parameters_dir.glob("interactive_*.txt"):
+            if file.name.split("_")[1] == func_name.split("_")[-2]:
+                with file.open("r") as input:
+                    lines = input.readlines()
+                    position = random.randint(1, len(lines) - 1)
+                    header = lines[0].strip("\n").split("|")
+                    data = lines[position].strip("\n").split("|")
+                    for i in range(len(header)):
+                        if "Date" in header[i]:
+                            time = int(data[i]) / 1000
+                            converted = datetime.utcfromtimestamp(time).strftime("%Y-%m-%dT%H:%M:%S")
+                            parameters[header[i]] = converted
+                        elif data[i].isdigit():
+                            parameters[header[i]] = int(data[i])
+                        else:
+                            parameters[header[i]] = data[i]
+
+        return parameters
+
+    def __init__(self, variant: str = None, benchmark_context: BenchmarkContext = None):
+        super().__init__(variant, benchmark_context=benchmark_context)
+        self._parameters_dir = self._prepare_parameters_directory()
+        self.benchmark_context = benchmark_context
+
+    def benchmark__interactive__complex_query_1_analytical(self):
+        memgraph = (
+            """
+        MATCH (p:Person {id: $personId}), (friend:Person {firstName: $firstName})
+            WHERE NOT p=friend
+            WITH p, friend
+            MATCH path =((p)-[:KNOWS *BFS 1..3]-(friend))
+            WITH min(size(path)) AS distance, friend
+        ORDER BY
+            distance ASC,
+            friend.lastName ASC,
+            toInteger(friend.id) ASC
+        LIMIT 20
+
+        MATCH (friend)-[:IS_LOCATED_IN]->(friendCity:City)
+        OPTIONAL MATCH (friend)-[studyAt:STUDY_AT]->(uni:University)-[:IS_LOCATED_IN]->(uniCity:City)
+        WITH friend, collect(
+            CASE uni.name
+                WHEN null THEN null
+                ELSE [uni.name, studyAt.classYear, uniCity.name]
+            END ) AS unis, friendCity, distance
+
+        OPTIONAL MATCH (friend)-[workAt:WORK_AT]->(company:Company)-[:IS_LOCATED_IN]->(companyCountry:Country)
+        WITH friend, collect(
+            CASE company.name
+                WHEN null THEN null
+                ELSE [company.name, workAt.workFrom, companyCountry.name]
+            END ) AS companies, unis, friendCity, distance
+
+        RETURN
+            friend.id AS friendId,
+            friend.lastName AS friendLastName,
+            distance AS distanceFromPerson,
+            friend.birthday AS friendBirthday,
+            friend.gender AS friendGender,
+            friend.browserUsed AS friendBrowserUsed,
+            friend.locationIP AS friendLocationIp,
+            friend.email AS friendEmails,
+            friend.speaks AS friendLanguages,
+            friendCity.name AS friendCityName,
+            unis AS friendUniversities,
+            companies AS friendCompanies
+        ORDER BY
+            distanceFromPerson ASC,
+            friendLastName ASC,
+            toInteger(friendId) ASC
+        LIMIT 20
+        """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+        neo4j = (
+            """
+            MATCH (p:Person {id: $personId}), (friend:Person {firstName: $firstName})
+                WHERE NOT p=friend
+                WITH p, friend
+                MATCH path = shortestPath((p)-[:KNOWS*1..3]-(friend))
+                WITH min(length(path)) AS distance, friend
+            ORDER BY
+                distance ASC,
+                friend.lastName ASC,
+                toInteger(friend.id) ASC
+            LIMIT 20
+
+            MATCH (friend)-[:IS_LOCATED_IN]->(friendCity:City)
+            OPTIONAL MATCH (friend)-[studyAt:STUDY_AT]->(uni:University)-[:IS_LOCATED_IN]->(uniCity:City)
+            WITH friend, collect(
+                CASE uni.name
+                    WHEN null THEN null
+                    ELSE [uni.name, studyAt.classYear, uniCity.name]
+                END ) AS unis, friendCity, distance
+
+            OPTIONAL MATCH (friend)-[workAt:WORK_AT]->(company:Company)-[:IS_LOCATED_IN]->(companyCountry:Country)
+            WITH friend, collect(
+                CASE company.name
+                    WHEN null THEN null
+                    ELSE [company.name, workAt.workFrom, companyCountry.name]
+                END ) AS companies, unis, friendCity, distance
+
+            RETURN
+                friend.id AS friendId,
+                friend.lastName AS friendLastName,
+                distance AS distanceFromPerson,
+                friend.birthday AS friendBirthday,
+                friend.gender AS friendGender,
+                friend.browserUsed AS friendBrowserUsed,
+                friend.locationIP AS friendLocationIp,
+                friend.email AS friendEmails,
+                friend.speaks AS friendLanguages,
+                friendCity.name AS friendCityName,
+                unis AS friendUniversities,
+                companies AS friendCompanies
+            ORDER BY
+                distanceFromPerson ASC,
+                friendLastName ASC,
+                toInteger(friendId) ASC
+            LIMIT 20
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+        if self._vendor == "memgraph":
+            return memgraph
+        else:
+            return neo4j
+
+    def benchmark__interactive__complex_query_2_analytical(self):
+        return (
+            """
+            MATCH (:Person {id: $personId })-[:KNOWS]-(friend:Person)<-[:HAS_CREATOR]-(message:Message)
+            WHERE message.creationDate <= localDateTime($maxDate)
+            RETURN
+                friend.id AS personId,
+                friend.firstName AS personFirstName,
+                friend.lastName AS personLastName,
+                message.id AS postOrCommentId,
+                coalesce(message.content,message.imageFile) AS postOrCommentContent,
+                message.creationDate AS postOrCommentCreationDate
+            ORDER BY
+                postOrCommentCreationDate DESC,
+                toInteger(postOrCommentId) ASC
+            LIMIT 20
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+    def benchmark__interactive__complex_query_3_analytical(self):
+
+        memgraph = (
+            """
+            MATCH (countryX:Country {name: $countryXName }),
+                (countryY:Country {name: $countryYName }),
+                (person:Person {id: $personId })
+            WITH person, countryX, countryY
+            LIMIT 1
+            MATCH (city:City)-[:IS_PART_OF]->(country:Country)
+            WHERE country IN [countryX, countryY]
+            WITH person, countryX, countryY, collect(city) AS cities
+            MATCH (person)-[:KNOWS*1..2]-(friend)-[:IS_LOCATED_IN]->(city)
+            WHERE NOT person=friend AND NOT city IN cities
+            WITH DISTINCT friend, countryX, countryY
+            MATCH (friend)<-[:HAS_CREATOR]-(message),
+                (message)-[:IS_LOCATED_IN]->(country)
+            WHERE localDateTime($startDate) + duration({day:$durationDays}) > message.creationDate >= localDateTime($startDate) AND
+                country IN [countryX, countryY]
+            WITH friend,
+                CASE WHEN country=countryX THEN 1 ELSE 0 END AS messageX,
+                CASE WHEN country=countryY THEN 1 ELSE 0 END AS messageY
+            WITH friend, sum(messageX) AS xCount, sum(messageY) AS yCount
+            WHERE xCount>0 AND yCount>0
+            RETURN friend.id AS friendId,
+                friend.firstName AS friendFirstName,
+                friend.lastName AS friendLastName,
+                xCount,
+                yCount,
+                xCount + yCount AS xyCount
+            ORDER BY xyCount DESC, friendId ASC
+            LIMIT 20
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+        neo4j = (
+            """
+            MATCH (countryX:Country {name: $countryXName }),
+                (countryY:Country {name: $countryYName }),
+                (person:Person {id: $personId })
+            WITH person, countryX, countryY
+            LIMIT 1
+            MATCH (city:City)-[:IS_PART_OF]->(country:Country)
+            WHERE country IN [countryX, countryY]
+            WITH person, countryX, countryY, collect(city) AS cities
+            MATCH (person)-[:KNOWS*1..2]-(friend)-[:IS_LOCATED_IN]->(city)
+            WHERE NOT person=friend AND NOT city IN cities
+            WITH DISTINCT friend, countryX, countryY
+            MATCH (friend)<-[:HAS_CREATOR]-(message),
+                (message)-[:IS_LOCATED_IN]->(country)
+            WHERE localDateTime($startDate) + duration({days:$durationDays}) > message.creationDate >= localDateTime($startDate) AND
+                country IN [countryX, countryY]
+            WITH friend,
+                CASE WHEN country=countryX THEN 1 ELSE 0 END AS messageX,
+                CASE WHEN country=countryY THEN 1 ELSE 0 END AS messageY
+            WITH friend, sum(messageX) AS xCount, sum(messageY) AS yCount
+            WHERE xCount>0 AND yCount>0
+            RETURN friend.id AS friendId,
+                friend.firstName AS friendFirstName,
+                friend.lastName AS friendLastName,
+                xCount,
+                yCount,
+                xCount + yCount AS xyCount
+            ORDER BY xyCount DESC, friendId ASC
+            LIMIT 20
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+        if self._vendor == "memgraph":
+            return memgraph
+        else:
+            return neo4j
+
+    def benchmark__interactive__complex_query_4_analytical(self):
+        memgraph = (
+            """
+            MATCH (person:Person {id: $personId })-[:KNOWS]-(friend:Person),
+                (friend)<-[:HAS_CREATOR]-(post:Post)-[:HAS_TAG]->(tag)
+            WITH DISTINCT tag, post
+            WITH tag,
+                CASE
+                    WHEN localDateTime($startDate) + duration({day:$durationDays}) > post.creationDate >= localDateTime($startDate) THEN 1
+                    ELSE 0
+                END AS valid,
+                CASE
+                    WHEN localDateTime($startDate) > post.creationDate THEN 1
+                    ELSE 0
+                END AS inValid
+            WITH tag, sum(valid) AS postCount, sum(inValid) AS inValidPostCount
+            WHERE postCount>0 AND inValidPostCount=0
+            RETURN tag.name AS tagName, postCount
+            ORDER BY postCount DESC, tagName ASC
+            LIMIT 10
+
+            """,
+            self._get_query_parameters(),
+        )
+
+        neo4j = (
+            """
+            MATCH (person:Person {id: $personId })-[:KNOWS]-(friend:Person),
+                (friend)<-[:HAS_CREATOR]-(post:Post)-[:HAS_TAG]->(tag)
+            WITH DISTINCT tag, post
+            WITH tag,
+                CASE
+                    WHEN localDateTime($startDate) + duration({days:$durationDays}) > post.creationDate >= localDateTime($startDate) THEN 1
+                    ELSE 0
+                END AS valid,
+                CASE
+                    WHEN localDateTime($startDate) > post.creationDate THEN 1
+                    ELSE 0
+                END AS inValid
+            WITH tag, sum(valid) AS postCount, sum(inValid) AS inValidPostCount
+            WHERE postCount>0 AND inValidPostCount=0
+            RETURN tag.name AS tagName, postCount
+            ORDER BY postCount DESC, tagName ASC
+            LIMIT 10
+
+            """,
+            self._get_query_parameters(),
+        )
+
+        if self._vendor == "memgraph":
+            return memgraph
+        else:
+            return neo4j
+
+    def benchmark__interactive__complex_query_5_analytical(self):
+        return (
+            """
+            MATCH (person:Person { id: $personId })-[:KNOWS*1..2]-(friend)
+            WHERE
+                NOT person=friend
+            WITH DISTINCT friend
+            MATCH (friend)<-[membership:HAS_MEMBER]-(forum)
+            WHERE
+                membership.joinDate > localDateTime($minDate)
+            WITH
+                forum,
+                collect(friend) AS friends
+            OPTIONAL MATCH (friend)<-[:HAS_CREATOR]-(post)<-[:CONTAINER_OF]-(forum)
+            WHERE
+                friend IN friends
+            WITH
+                forum,
+                count(post) AS postCount
+            RETURN
+                forum.title AS forumName,
+                postCount
+            ORDER BY
+                postCount DESC,
+                forum.id ASC
+            LIMIT 20
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+    def benchmark__interactive__complex_query_6_analytical(self):
+        return (
+            """
+            MATCH (knownTag:Tag { name: $tagName })
+            WITH knownTag.id as knownTagId
+
+            MATCH (person:Person { id: $personId })-[:KNOWS*1..2]-(friend)
+            WHERE NOT person=friend
+            WITH
+                knownTagId,
+                collect(distinct friend) as friends
+            UNWIND friends as f
+                MATCH (f)<-[:HAS_CREATOR]-(post:Post),
+                    (post)-[:HAS_TAG]->(t:Tag{id: knownTagId}),
+                    (post)-[:HAS_TAG]->(tag:Tag)
+                WHERE NOT t = tag
+                WITH
+                    tag.name as tagName,
+                    count(post) as postCount
+            RETURN
+                tagName,
+                postCount
+            ORDER BY
+                postCount DESC,
+                tagName ASC
+            LIMIT 10
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+    def benchmark__interactive__complex_query_7_analytical(self):
+        memgraph = (
+            """
+            MATCH (person:Person {id: $personId})<-[:HAS_CREATOR]-(message:Message)<-[like:LIKES]-(liker:Person)
+                WITH liker, message, like.creationDate AS likeTime, person
+                ORDER BY likeTime DESC, toInteger(message.id) ASC
+                WITH liker, head(collect({msg: message, likeTime: likeTime})) AS latestLike, person
+                OPTIONAL MATCH (liker)-[:KNOWS]-(person)
+                WITH liker, latestLike, person,
+                    CASE WHEN person IS null THEN TRUE ELSE FALSE END AS isNew
+            RETURN
+                liker.id AS personId,
+                liker.firstName AS personFirstName,
+                liker.lastName AS personLastName,
+                latestLike.likeTime AS likeCreationDate,
+                latestLike.msg.id AS commentOrPostId,
+                coalesce(latestLike.msg.content, latestLike.msg.imageFile) AS commentOrPostContent,
+                (latestLike.likeTime - latestLike.msg.creationDate).minute AS minutesLatency
+            ORDER BY
+                likeCreationDate DESC,
+                toInteger(personId) ASC
+            LIMIT 20
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+        neo4j = (
+            """
+            MATCH (person:Person {id: $personId})<-[:HAS_CREATOR]-(message:Message)<-[like:LIKES]-(liker:Person)
+                WITH liker, message, like.creationDate AS likeTime, person
+                ORDER BY likeTime DESC, toInteger(message.id) ASC
+                WITH liker, head(collect({msg: message, likeTime: likeTime})) AS latestLike, person
+            RETURN
+                liker.id AS personId,
+                liker.firstName AS personFirstName,
+                liker.lastName AS personLastName,
+                latestLike.likeTime AS likeCreationDate,
+                latestLike.msg.id AS commentOrPostId,
+                coalesce(latestLike.msg.content, latestLike.msg.imageFile) AS commentOrPostContent,
+                duration.between(latestLike.likeTime, latestLike.msg.creationDate).minutes AS minutesLatency,
+                not((liker)-[:KNOWS]-(person)) AS isNew
+            ORDER BY
+                likeCreationDate DESC,
+                toInteger(personId) ASC
+            LIMIT 20
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+        if self._vendor == "memgraph":
+            return memgraph
+        else:
+            return neo4j
+
+    def benchmark__interactive__complex_query_8_analytical(self):
+        return (
+            """
+            MATCH (start:Person {id: $personId})<-[:HAS_CREATOR]-(:Message)<-[:REPLY_OF]-(comment:Comment)-[:HAS_CREATOR]->(person:Person)
+            RETURN
+                person.id AS personId,
+                person.firstName AS personFirstName,
+                person.lastName AS personLastName,
+                comment.creationDate AS commentCreationDate,
+                comment.id AS commentId,
+                comment.content AS commentContent
+            ORDER BY
+                commentCreationDate DESC,
+                commentId ASC
+            LIMIT 20
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+    def benchmark__interactive__complex_query_9_analytical(self):
+        return (
+            """
+            MATCH (root:Person {id: $personId })-[:KNOWS*1..2]-(friend:Person)
+            WHERE NOT friend = root
+            WITH collect(distinct friend) as friends
+            UNWIND friends as friend
+                MATCH (friend)<-[:HAS_CREATOR]-(message:Message)
+                WHERE message.creationDate < localDateTime($maxDate)
+            RETURN
+                friend.id AS personId,
+                friend.firstName AS personFirstName,
+                friend.lastName AS personLastName,
+                message.id AS commentOrPostId,
+                coalesce(message.content,message.imageFile) AS commentOrPostContent,
+                message.creationDate AS commentOrPostCreationDate
+            ORDER BY
+                commentOrPostCreationDate DESC,
+                message.id ASC
+            LIMIT 20
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+    def benchmark__interactive__complex_query_10_analytical(self):
+        memgraph = (
+            """
+            MATCH (person:Person {id: $personId})-[:KNOWS*2..2]-(friend),
+                (friend)-[:IS_LOCATED_IN]->(city:City)
+            WHERE NOT friend=person AND
+                NOT (friend)-[:KNOWS]-(person)
+            WITH person, city, friend, datetime({epochMillis: friend.birthday}) as birthday
+            WHERE  (birthday.month=$month AND birthday.day>=21) OR
+                    (birthday.month=($month%12)+1 AND birthday.day<22)
+            WITH DISTINCT friend, city, person
+            OPTIONAL MATCH (friend)<-[:HAS_CREATOR]-(post:Post)
+            WITH friend, city, collect(post) AS posts, person
+            WITH friend,
+                city,
+                size(posts) AS postCount,
+                size([p IN posts WHERE (p)-[:HAS_TAG]->()<-[:HAS_INTEREST]-(person)]) AS commonPostCount
+            RETURN friend.id AS personId,
+                friend.firstName AS personFirstName,
+                friend.lastName AS personLastName,
+                commonPostCount - (postCount - commonPostCount) AS commonInterestScore,
+                friend.gender AS personGender,
+                city.name AS personCityName
+            ORDER BY commonInterestScore DESC, personId ASC
+            LIMIT 10
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+        neo4j = (
+            """
+            MATCH (person:Person {id: $personId})-[:KNOWS*2..2]-(friend),
+                (friend)-[:IS_LOCATED_IN]->(city:City)
+            WHERE NOT friend=person AND
+                NOT (friend)-[:KNOWS]-(person)
+            WITH person, city, friend, datetime({epochMillis: friend.birthday}) as birthday
+            WHERE  (birthday.month=$month AND birthday.day>=21) OR
+                    (birthday.month=($month%12)+1 AND birthday.day<22)
+            WITH DISTINCT friend, city, person
+            OPTIONAL MATCH (friend)<-[:HAS_CREATOR]-(post:Post)
+            WITH friend, city, collect(post) AS posts, person
+            WITH friend,
+                city,
+                size(posts) AS postCount,
+                size([p IN posts WHERE (p)-[:HAS_TAG]->()<-[:HAS_INTEREST]-(person)]) AS commonPostCount
+            RETURN friend.id AS personId,
+                friend.firstName AS personFirstName,
+                friend.lastName AS personLastName,
+                commonPostCount - (postCount - commonPostCount) AS commonInterestScore,
+                friend.gender AS personGender,
+                city.name AS personCityName
+            ORDER BY commonInterestScore DESC, personId ASC
+            LIMIT 10
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+        if self._vendor == "memgraph":
+            return memgraph
+        else:
+            return neo4j
+
+    def benchmark__interactive__complex_query_11_analytical(self):
+        return (
+            """
+            MATCH (person:Person {id: $personId })-[:KNOWS*1..2]-(friend:Person)
+            WHERE not(person=friend)
+            WITH DISTINCT friend
+            MATCH (friend)-[workAt:WORK_AT]->(company:Company)-[:IS_LOCATED_IN]->(:Country {name: $countryName })
+            WHERE workAt.workFrom < $workFromYear
+            RETURN
+                    friend.id AS personId,
+                    friend.firstName AS personFirstName,
+                    friend.lastName AS personLastName,
+                    company.name AS organizationName,
+                    workAt.workFrom AS organizationWorkFromYear
+            ORDER BY
+                    organizationWorkFromYear ASC,
+                    toInteger(personId) ASC,
+                    organizationName DESC
+            LIMIT 10
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+    def benchmark__interactive__complex_query_12_analytical(self):
+        return (
+            """
+            MATCH (tag:Tag)-[:HAS_TYPE|IS_SUBCLASS_OF*0..]->(baseTagClass:TagClass)
+            WHERE tag.name = $tagClassName OR baseTagClass.name = $tagClassName
+            WITH collect(tag.id) as tags
+            MATCH (:Person {id: $personId })-[:KNOWS]-(friend:Person)<-[:HAS_CREATOR]-(comment:Comment)-[:REPLY_OF]->(:Post)-[:HAS_TAG]->(tag:Tag)
+            WHERE tag.id in tags
+            RETURN
+                friend.id AS personId,
+                friend.firstName AS personFirstName,
+                friend.lastName AS personLastName,
+                collect(DISTINCT tag.name) AS tagNames,
+                count(DISTINCT comment) AS replyCount
+            ORDER BY
+                replyCount DESC,
+                toInteger(personId) ASC
+            LIMIT 20
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+    def benchmark__interactive__complex_query_13_analytical(self):
+        memgraph = (
+            """
+            MATCH
+                (person1:Person {id: $person1Id}),
+                (person2:Person {id: $person2Id}),
+                path = (person1)-[:KNOWS *BFS]-(person2)
+            RETURN
+                CASE path IS NULL
+                    WHEN true THEN -1
+                    ELSE size(path)
+                END AS shortestPathLength
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+        neo4j = (
+            """
+            MATCH
+                (person1:Person {id: $person1Id}),
+                (person2:Person {id: $person2Id}),
+                path = shortestPath((person1)-[:KNOWS*]-(person2))
+            RETURN
+                CASE path IS NULL
+                    WHEN true THEN -1
+                    ELSE length(path)
+                END AS shortestPathLength
+            """.replace(
+                "\n", ""
+            ),
+            self._get_query_parameters(),
+        )
+
+        if self._vendor == "memgraph":
+            return memgraph
+        else:
+            return neo4j
--- a/tests/mgbench/workloads/pokec.py
+++ b/tests/mgbench/workloads/pokec.py
@ -1,134 +1,17 @@
-# Copyright 2022 Memgraph Ltd.
-#
-# Use of this software is governed by the Business Source License
-# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
-# License, and you may not use this file except in compliance with the Business Source License.
-#
-# As of the Change Date specified in that file, in accordance with
-# the Business Source License, use of this software will be governed
-# by the Apache License, Version 2.0, included in the file
-# licenses/APL.txt.
-
 import random

-import helpers
+from benchmark_context import BenchmarkContext
+from workloads.base import Workload
+from workloads.importers.importer_pokec import ImporterPokec


-# Base dataset class used as a template to create each individual dataset. All
-# common logic is handled here.
-class Dataset:
-    # Name of the dataset.
-    NAME = "Base dataset"
-    # List of all variants of the dataset that exist.
-    VARIANTS = ["default"]
-    # One of the available variants that should be used as the default variant.
-    DEFAULT_VARIANT = "default"
-    # List of query files that should be used to import the dataset.
-    FILES = {
-        "default": "/foo/bar",
-    }
-    INDEX = None
-    INDEX_FILES = {"default": ""}
-    # List of query file URLs that should be used to import the dataset.
-    URLS = None
-    # Number of vertices/edges for each variant.
-    SIZES = {
-        "default": {"vertices": 0, "edges": 0},
-    }
-    # Indicates whether the dataset has properties on edges.
-    PROPERTIES_ON_EDGES = False
-
-    def __init__(self, variant=None, vendor=None):
-        """
-        Accepts a `variant` variable that indicates which variant
-        of the dataset should be executed.
-        """
-        if variant is None:
-            variant = self.DEFAULT_VARIANT
-        if variant not in self.VARIANTS:
-            raise ValueError("Invalid test variant!")
-        if (self.FILES and variant not in self.FILES) and (self.URLS and variant not in self.URLS):
-            raise ValueError("The variant doesn't have a defined URL or " "file path!")
-        if variant not in self.SIZES:
-            raise ValueError("The variant doesn't have a defined dataset " "size!")
-        if vendor not in self.INDEX_FILES:
-            raise ValueError("Vendor does not have INDEX for dataset!")
-        self._variant = variant
-        self._vendor = vendor
-        if self.FILES is not None:
-            self._file = self.FILES.get(variant, None)
-        else:
-            self._file = None
-        if self.URLS is not None:
-            self._url = self.URLS.get(variant, None)
-        else:
-            self._url = None
-
-        if self.INDEX_FILES is not None:
-            self._index = self.INDEX_FILES.get(vendor, None)
-        else:
-            self._index = None
-
-        self._size = self.SIZES[variant]
-        if "vertices" not in self._size or "edges" not in self._size:
-            raise ValueError("The size defined for this variant doesn't " "have the number of vertices and/or edges!")
-        self._num_vertices = self._size["vertices"]
-        self._num_edges = self._size["edges"]
-
-    def prepare(self, directory):
-        if self._file is not None:
-            print("Using dataset file:", self._file)
-        else:
-            # TODO: add support for JSON datasets
-            cached_input, exists = directory.get_file("dataset.cypher")
-            if not exists:
-                print("Downloading dataset file:", self._url)
-                downloaded_file = helpers.download_file(self._url, directory.get_path())
-                print("Unpacking and caching file:", downloaded_file)
-                helpers.unpack_and_move_file(downloaded_file, cached_input)
-            print("Using cached dataset file:", cached_input)
-            self._file = cached_input
-
-        cached_index, exists = directory.get_file(self._vendor + ".cypher")
-        if not exists:
-            print("Downloading index file:", self._index)
-            downloaded_file = helpers.download_file(self._index, directory.get_path())
-            print("Unpacking and caching file:", downloaded_file)
-            helpers.unpack_and_move_file(downloaded_file, cached_index)
-        print("Using cached index file:", cached_index)
-        self._index = cached_index
-
-    def get_variant(self):
-        """Returns the current variant of the dataset."""
-        return self._variant
-
-    def get_index(self):
-        """Get index file, defined by vendor"""
-        return self._index
-
-    def get_file(self):
-        """
-        Returns path to the file that contains dataset creation queries.
-        """
-        return self._file
-
-    def get_size(self):
-        """Returns number of vertices/edges for the current variant."""
-        return self._size
-
-    # All tests should be query generator functions that output all of the
-    # queries that should be executed by the runner. The functions should be
-    # named `benchmark__GROUPNAME__TESTNAME` and should not accept any
-    # arguments.
-
-
-class Pokec(Dataset):
+class Pokec(Workload):
    NAME = "pokec"
    VARIANTS = ["small", "medium", "large"]
    DEFAULT_VARIANT = "small"
-    FILES = None
+    FILE = None

-    URLS = {
+    URL_FILE = {
        "small": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_small_import.cypher",
        "medium": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_medium_import.cypher",
        "large": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/pokec_large.setup.cypher.gz",
@ -138,16 +21,28 @@ class Pokec(Dataset):
        "medium": {"vertices": 100000, "edges": 1768515},
        "large": {"vertices": 1632803, "edges": 30622564},
    }
-    INDEX = None
-    INDEX_FILES = {
+
+    URL_INDEX_FILE = {
        "memgraph": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/memgraph.cypher",
        "neo4j": "https://s3.eu-west-1.amazonaws.com/deps.memgraph.io/dataset/pokec/benchmark/neo4j.cypher",
    }

    PROPERTIES_ON_EDGES = False

-    # Helpers used to generate the queries
+    def __init__(self, variant: str = None, benchmark_context: BenchmarkContext = None):
+        super().__init__(variant, benchmark_context=benchmark_context)

+    def custom_import(self) -> bool:
+        importer = ImporterPokec(
+            benchmark_context=self.benchmark_context,
+            dataset_name=self.NAME,
+            index_file=self._file_index,
+            dataset_file=self._file,
+            variant=self._variant,
+        )
+        return importer.execute_import()
+
+    # Helpers used to generate the queries
    def _get_random_vertex(self):
        # All vertices in the Pokec dataset have an ID in the range
        # [1, _num_vertices].
@ -343,7 +238,7 @@ class Pokec(Dataset):
        return ("MATCH (n:User {id: $id}) RETURN n", {"id": self._get_random_vertex()})

    def benchmark__match__vertex_on_property(self):
-        return ("MATCH (n {id: $id}) RETURN n", {"id": self._get_random_vertex()})
+        return ("MATCH (n:User {id: $id}) RETURN n", {"id": self._get_random_vertex()})

    def benchmark__update__vertex_on_property(self):
        return (
@ -364,7 +259,7 @@ class Pokec(Dataset):

    def benchmark__basic__single_vertex_property_update_update(self):
        return (
-            "MATCH (n {id: $id}) SET n.property = -1",
+            "MATCH (n:User {id: $id}) SET n.property = -1",
            {"id": self._get_random_vertex()},
        )