From 8b834c702cee2c9a20c8366c436aa0953dbfd5e1 Mon Sep 17 00:00:00 2001
From: Ante Javor <javor.ante@gmail.com>
Date: Sat, 14 Jan 2023 16:11:49 +0100
Subject: [PATCH] Update mgbench to run Diff workflow under 30mins (#730)

---
 tests/mgbench/benchmark.py   | 65 +++++++++++++++++++++---------------
 tests/mgbench/graph_bench.py |  2 ++
 2 files changed, 41 insertions(+), 26 deletions(-)

diff --git a/tests/mgbench/benchmark.py b/tests/mgbench/benchmark.py
index 743f4e92c..f08d6e3fa 100755
--- a/tests/mgbench/benchmark.py
+++ b/tests/mgbench/benchmark.py
@@ -142,7 +142,7 @@ parser.add_argument(
     with the presence of 300 write queries from write type or 30%""",
 )
 
-parser.add_argument("--tail-latency", type=int, default=100, help="Number of queries for the tail latency statistics")
+parser.add_argument("--tail-latency", type=int, default=0, help="Number of queries for the tail latency statistics")
 
 parser.add_argument(
     "--performance-tracking",
@@ -223,8 +223,17 @@ def filter_benchmarks(generators, patterns):
                         patterns,
                     ):
                         current[group].append((query_name, query_func))
-            if len(current) > 0:
-                filtered.append((generator(variant, args.vendor_name), dict(current)))
+            if len(current) == 0:
+                continue
+
+            # Ignore benchgraph "basic" queries in standard CI/CD run
+            for pattern in patterns:
+                res = pattern.count("*")
+                key = "basic"
+                if res >= 2 and key in current.keys():
+                    current.pop(key)
+
+            filtered.append((generator(variant, args.vendor_name), dict(current)))
     return filtered
 
 
@@ -241,30 +250,34 @@ def warmup(client):
 
 
 def tail_latency(vendor, client, func):
-    vendor.start_benchmark("tail_latency")
-    if args.warmup_run:
-        warmup(client)
-    latency = []
     iteration = args.tail_latency
-    query_list = get_queries(func, iteration)
-    for i in range(0, iteration):
-        ret = client.execute(queries=[query_list[i]], num_workers=1)
-        latency.append(ret[0]["duration"])
-    latency.sort()
-    query_stats = {
-        "iterations": iteration,
-        "min": latency[0],
-        "max": latency[iteration - 1],
-        "mean": statistics.mean(latency),
-        "p99": latency[math.floor(iteration * 0.99) - 1],
-        "p95": latency[math.floor(iteration * 0.95) - 1],
-        "p90": latency[math.floor(iteration * 0.90) - 1],
-        "p75": latency[math.floor(iteration * 0.75) - 1],
-        "p50": latency[math.floor(iteration * 0.50) - 1],
-    }
-    print("Query statistics for tail latency: ")
-    print(query_stats)
-    vendor.stop("tail_latency")
+    if iteration >= 10:
+        vendor.start_benchmark("tail_latency")
+        if args.warmup_run:
+            warmup(client)
+        latency = []
+
+        query_list = get_queries(func, iteration)
+        for i in range(0, iteration):
+            ret = client.execute(queries=[query_list[i]], num_workers=1)
+            latency.append(ret[0]["duration"])
+        latency.sort()
+        query_stats = {
+            "iterations": iteration,
+            "min": latency[0],
+            "max": latency[iteration - 1],
+            "mean": statistics.mean(latency),
+            "p99": latency[math.floor(iteration * 0.99) - 1],
+            "p95": latency[math.floor(iteration * 0.95) - 1],
+            "p90": latency[math.floor(iteration * 0.90) - 1],
+            "p75": latency[math.floor(iteration * 0.75) - 1],
+            "p50": latency[math.floor(iteration * 0.50) - 1],
+        }
+        print("Query statistics for tail latency: ")
+        print(query_stats)
+        vendor.stop("tail_latency")
+    else:
+        query_stats = {}
     return query_stats
 
 
diff --git a/tests/mgbench/graph_bench.py b/tests/mgbench/graph_bench.py
index 8173a4a54..d1a633081 100644
--- a/tests/mgbench/graph_bench.py
+++ b/tests/mgbench/graph_bench.py
@@ -147,6 +147,8 @@ def run_full_benchmarks(vendor, binary, dataset_size, dataset_group, realistic,
         "12",
         "--no-authorization",
         "pokec/" + dataset_size + "/" + dataset_group + "/*",
+        "--tail-latency",
+        "100",
     ]
 
     for config in configurations: