# Copyright 2022 Memgraph Ltd.
#
# Use of this software is governed by the Business Source License
# included in the file licenses/BSL.txt; by using this file, you agree to be bound by the terms of the Business Source
# License, and you may not use this file except in compliance with the Business Source License.
#
# As of the Change Date specified in that file, in accordance with
# the Business Source License, use of this software will be governed
# by the Apache License, Version 2.0, included in the file
# licenses/APL.txt.

####################################
#  Benchmark datastructures analyzer
####################################
# This scripts uses the output from dataset benchmark tests to plot charts
# comparing the results of different datastructures on the same operation.
#
# Note: Naming the tests is very important in order for this script to recognize
# which operation is being performed and on which DS, so it should come in this
# form: BM_Benchmark<Operation><Datastructure>/<RunArgument>
# where run_argument will be added automatically by google benchmark framework

import argparse
import json
import sys
from dataclasses import dataclass
from enum import Enum
from pathlib import Path
from typing import Any, Dict, List, Optional

import matplotlib.pyplot as plt


class Operation(Enum):
    CONTAINS = "contains"
    FIND = "find"
    INSERT = "insert"
    RANDOM = "random"
    REMOVE = "remove"

    @classmethod
    def to_list(cls) -> List[str]:
        return list(map(lambda c: c.value, cls))

    @staticmethod
    def get(s: str) -> Optional["Operation"]:
        try:
            return Operation[s.upper()]
        except ValueError:
            return None

    def __str__(self):
        return str(self.value)


@dataclass(frozen=True)
class BenchmarkRow:
    name: str
    datastructure: str
    operation: Operation
    real_time: int
    cpu_time: int
    iterations: int
    time_unit: str
    run_arg: Optional[Any]


class GoogleBenchmarkResult:
    def __init__(self):
        self._operation = None
        self._datastructures: Dict[str, List[BenchmarkRow]] = dict()

    def add_result(self, row: BenchmarkRow) -> None:
        if self._operation is None:
            self._operation = row.operation
        assert self._operation is row.operation
        if row.datastructure not in self._datastructures:
            self._datastructures[row.datastructure] = [row]
        else:
            self._datastructures[row.datastructure].append(row)

    @property
    def operation(self) -> Optional[Operation]:
        return self._operation

    @property
    def datastructures(self) -> Dict[str, List[BenchmarkRow]]:
        return self._datastructures


def get_operation(s: str) -> Operation:
    for op in Operation.to_list():
        if op.lower() in s.lower():
            operation_enum = Operation.get(op)
            if operation_enum is not None:
                return operation_enum
            else:
                print("Operation not found!")
                sys.exit(1)
    print("Operation not found!")
    sys.exit(1)


def get_row_data(line: Dict[str, Any]) -> BenchmarkRow:
    """
    Naming is very important, first must come an Operation name, and then a data
    structure to test.
    """
    full_name = line["name"].split("BM_Benchmark")[1]
    name_with_run_arg = full_name.split("/")
    operation = get_operation(name_with_run_arg[0])
    datastructure = name_with_run_arg[0].split(operation.value.capitalize())[1]

    run_arg = None
    if len(name_with_run_arg) > 1:
        run_arg = name_with_run_arg[1]

    return BenchmarkRow(
        name_with_run_arg[0],
        datastructure,
        operation,
        line["real_time"],
        line["cpu_time"],
        line["iterations"],
        line["time_unit"],
        run_arg,
    )


def get_benchmark_res(args) -> Optional[GoogleBenchmarkResult]:
    file_path = Path(args.log_file)
    if not file_path.exists():
        print("Error file {file_path} not found!")
        return None
    with file_path.open("r") as file:
        data = json.load(file)
        res = GoogleBenchmarkResult()
        assert "benchmarks" in data, "There must be a benchmark list inside"
        for benchmark in data["benchmarks"]:
            res.add_result(get_row_data(benchmark))
        return res


def plot_operation(results: GoogleBenchmarkResult, save: bool) -> None:
    colors = ["red", "green", "blue", "yellow", "purple", "brown"]
    assert results.operation is not None
    fig = plt.figure()
    for ds, benchmarks in results.datastructures.items():
        if benchmarks:
            # Print line chart
            x_axis = [elem.real_time for elem in benchmarks]
            y_axis = [elem.run_arg for elem in benchmarks]
            plt.plot(x_axis, y_axis, marker="", color=colors.pop(0), linewidth="2", label=f"{ds}")
            plt.title(f"Benchmark results for operation {results.operation.value}")
            plt.xlabel(f"Time [{benchmarks[0].time_unit}]")
            plt.grid(True)
            plt.legend()
            plt.draw()
        else:
            print(f"Nothing to do for {ds}...")
    if save:
        plt.savefig(f"{results.operation.value}.png")
        plt.close(fig)
    else:
        plt.show()


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Process benchmark results.")
    parser.add_argument("--log_file", type=str)
    parser.add_argument("--save", type=bool, default=True)
    return parser.parse_args()


def main():
    args = parse_args()
    res = get_benchmark_res(args)
    if res is None:
        print("Failed to get results from log file!")
        sys.exit(1)
    plot_operation(res, args.save)


if __name__ == "__main__":
    main()