Add bfs benchmark on pokec dataset
Reviewers: ipaljak, mferencevic Reviewed By: ipaljak, mferencevic Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D1533
This commit is contained in:
parent
cd3210fb9b
commit
158f97206d
@ -78,11 +78,12 @@ class QueryClient:
|
|||||||
|
|
||||||
|
|
||||||
class LongRunningClient:
|
class LongRunningClient:
|
||||||
def __init__(self, args, default_num_workers):
|
def __init__(self, args, default_num_workers, workload):
|
||||||
self.log = logging.getLogger("LongRunningClient")
|
self.log = logging.getLogger("LongRunningClient")
|
||||||
self.client = jail.get_process()
|
self.client = jail.get_process()
|
||||||
set_cpus("client-cpu-ids", self.client, args)
|
set_cpus("client-cpu-ids", self.client, args)
|
||||||
self.default_num_workers = default_num_workers
|
self.default_num_workers = default_num_workers
|
||||||
|
self.workload = workload
|
||||||
|
|
||||||
# TODO: This is quite similar to __call__ method of QueryClient. Remove
|
# TODO: This is quite similar to __call__ method of QueryClient. Remove
|
||||||
# duplication.
|
# duplication.
|
||||||
@ -114,7 +115,9 @@ class LongRunningClient:
|
|||||||
client_args = ["--port", database.args.port,
|
client_args = ["--port", database.args.port,
|
||||||
"--num-workers", str(num_workers),
|
"--num-workers", str(num_workers),
|
||||||
"--output", output,
|
"--output", output,
|
||||||
"--duration", str(duration)]
|
"--duration", str(duration),
|
||||||
|
"--db", database.name,
|
||||||
|
"--scenario", self.workload]
|
||||||
|
|
||||||
return_code = self.client.run_and_wait(
|
return_code = self.client.run_and_wait(
|
||||||
client, client_args, timeout=600, stdin=config_path)
|
client, client_args, timeout=600, stdin=config_path)
|
||||||
|
122
tests/macro_benchmark/clients/bfs_pokec_client.cpp
Normal file
122
tests/macro_benchmark/clients/bfs_pokec_client.cpp
Normal file
@ -0,0 +1,122 @@
|
|||||||
|
#include <array>
|
||||||
|
#include <chrono>
|
||||||
|
#include <fstream>
|
||||||
|
#include <iostream>
|
||||||
|
#include <queue>
|
||||||
|
#include <random>
|
||||||
|
#include <sstream>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <gflags/gflags.h>
|
||||||
|
#include <glog/logging.h>
|
||||||
|
#include <json/json.hpp>
|
||||||
|
|
||||||
|
#include "io/network/utils.hpp"
|
||||||
|
#include "utils/algorithm.hpp"
|
||||||
|
#include "utils/timer.hpp"
|
||||||
|
|
||||||
|
#include "long_running_common.hpp"
|
||||||
|
|
||||||
|
using communication::bolt::Edge;
|
||||||
|
using communication::bolt::Value;
|
||||||
|
using communication::bolt::Vertex;
|
||||||
|
|
||||||
|
class BfsPokecClient : public TestClient {
|
||||||
|
public:
|
||||||
|
BfsPokecClient(int id, const std::string &db)
|
||||||
|
: TestClient(), rg_(id), db_(db) {
|
||||||
|
auto result = Execute("MATCH (n:User) RETURN count(1)", {}, "NumNodes");
|
||||||
|
CHECK(result) << "Read-only query should not fail";
|
||||||
|
num_nodes_ = result->records[0][0].ValueInt();
|
||||||
|
}
|
||||||
|
|
||||||
|
private:
|
||||||
|
std::mt19937 rg_;
|
||||||
|
std::string db_;
|
||||||
|
int num_nodes_;
|
||||||
|
|
||||||
|
int RandomId() {
|
||||||
|
std::uniform_int_distribution<int64_t> dist(1, num_nodes_);
|
||||||
|
auto id = dist(rg_);
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void BfsWithDestinationNode() {
|
||||||
|
auto start = RandomId();
|
||||||
|
auto end = RandomId();
|
||||||
|
while (start == end) {
|
||||||
|
end = RandomId();
|
||||||
|
}
|
||||||
|
if (FLAGS_db == "memgraph") {
|
||||||
|
auto result = Execute(
|
||||||
|
"MATCH p = (n:User {id: $start})-[*bfs..15]->(m:User {id: $end}) "
|
||||||
|
"RETURN nodes(p) AS path LIMIT 1",
|
||||||
|
{{"start", start}, {"end", end}}, "Bfs");
|
||||||
|
CHECK(result) << "Read-only query should not fail!";
|
||||||
|
} else if (FLAGS_db == "neo4j") {
|
||||||
|
auto result = Execute(
|
||||||
|
"MATCH p = shortestPath("
|
||||||
|
"(n:User {id: $start})-[*..15]->(m:User {id: $end}))"
|
||||||
|
"RETURN [x in nodes(p) | x.id] AS path;",
|
||||||
|
{{"start", start}, {"end", end}}, "Bfs");
|
||||||
|
CHECK(result) << "Read-only query should not fail!";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
void BfsWithoutDestinationNode() {
|
||||||
|
auto start = RandomId();
|
||||||
|
if (FLAGS_db == "memgraph") {
|
||||||
|
auto result = Execute(
|
||||||
|
"MATCH p = (n:User {id: $start})-[*bfs..15]->(m:User) WHERE m != n "
|
||||||
|
"RETURN nodes(p) AS path",
|
||||||
|
{{"start", start}}, "Bfs");
|
||||||
|
CHECK(result) << "Read-only query should not fail!";
|
||||||
|
} else {
|
||||||
|
auto result = Execute(
|
||||||
|
"MATCH p = shortestPath("
|
||||||
|
"(n:User {id: $start})-[*..15]->(m:User)) WHERE m <> n "
|
||||||
|
"RETURN [x in nodes(p) | x.id] AS path;",
|
||||||
|
{{"start", start}}, "Bfs");
|
||||||
|
CHECK(result) << "Read-only query should not fail!";
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
public:
|
||||||
|
virtual void Step() override {
|
||||||
|
if (FLAGS_scenario == "with_destination_node") {
|
||||||
|
BfsWithDestinationNode();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (FLAGS_scenario == "without_destination_node") {
|
||||||
|
BfsWithoutDestinationNode();
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
LOG(FATAL) << "Should not get here: unknown scenario!";
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
int main(int argc, char **argv) {
|
||||||
|
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
||||||
|
google::InitGoogleLogging(argv[0]);
|
||||||
|
|
||||||
|
communication::Init();
|
||||||
|
|
||||||
|
Endpoint endpoint(FLAGS_address, FLAGS_port);
|
||||||
|
ClientContext context(FLAGS_use_ssl);
|
||||||
|
Client client(&context);
|
||||||
|
if (!client.Connect(endpoint, FLAGS_username, FLAGS_password)) {
|
||||||
|
LOG(FATAL) << "Couldn't connect to " << endpoint;
|
||||||
|
}
|
||||||
|
|
||||||
|
std::vector<std::unique_ptr<TestClient>> clients;
|
||||||
|
for (auto i = 0; i < FLAGS_num_workers; ++i) {
|
||||||
|
clients.emplace_back(std::make_unique<BfsPokecClient>(i, "memgraph"));
|
||||||
|
}
|
||||||
|
|
||||||
|
RunMultithreadedTest(clients);
|
||||||
|
return 0;
|
||||||
|
}
|
@ -22,6 +22,7 @@
|
|||||||
|
|
||||||
const int MAX_RETRIES = 30;
|
const int MAX_RETRIES = 30;
|
||||||
|
|
||||||
|
DEFINE_string(db, "", "Database queries are executed on.");
|
||||||
DEFINE_string(address, "127.0.0.1", "Server address");
|
DEFINE_string(address, "127.0.0.1", "Server address");
|
||||||
DEFINE_int32(port, 7687, "Server port");
|
DEFINE_int32(port, 7687, "Server port");
|
||||||
DEFINE_int32(num_workers, 1, "Number of workers");
|
DEFINE_int32(num_workers, 1, "Number of workers");
|
||||||
@ -35,6 +36,7 @@ DEFINE_string(group, "unknown", "Test group name");
|
|||||||
DEFINE_string(scenario, "unknown", "Test scenario name");
|
DEFINE_string(scenario, "unknown", "Test scenario name");
|
||||||
|
|
||||||
auto &executed_queries = stats::GetCounter("executed_queries");
|
auto &executed_queries = stats::GetCounter("executed_queries");
|
||||||
|
auto &executed_steps = stats::GetCounter("executed_steps");
|
||||||
auto &serialization_errors = stats::GetCounter("serialization_errors");
|
auto &serialization_errors = stats::GetCounter("serialization_errors");
|
||||||
|
|
||||||
class TestClient {
|
class TestClient {
|
||||||
@ -59,6 +61,7 @@ class TestClient {
|
|||||||
runner_thread_ = std::thread([&] {
|
runner_thread_ = std::thread([&] {
|
||||||
while (keep_running_) {
|
while (keep_running_) {
|
||||||
Step();
|
Step();
|
||||||
|
executed_steps.Bump();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -185,6 +188,7 @@ void RunMultithreadedTest(std::vector<std::unique_ptr<TestClient>> &clients) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
out << "{\"num_executed_queries\": " << executed_queries.Value() << ", "
|
out << "{\"num_executed_queries\": " << executed_queries.Value() << ", "
|
||||||
|
<< "\"num_executed_steps\": " << executed_steps.Value() << ", "
|
||||||
<< "\"elapsed_time\": " << timer.Elapsed().count()
|
<< "\"elapsed_time\": " << timer.Elapsed().count()
|
||||||
<< ", \"queries\": [";
|
<< ", \"queries\": [";
|
||||||
utils::PrintIterable(
|
utils::PrintIterable(
|
||||||
|
@ -40,6 +40,7 @@ class Memgraph:
|
|||||||
self.config = config
|
self.config = config
|
||||||
self.num_workers = num_workers
|
self.num_workers = num_workers
|
||||||
self.database_bin = jail.get_process()
|
self.database_bin = jail.get_process()
|
||||||
|
self.name = "memgraph"
|
||||||
set_cpus("database-cpu-ids", self.database_bin, args)
|
set_cpus("database-cpu-ids", self.database_bin, args)
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
@ -90,6 +91,7 @@ class Neo:
|
|||||||
self.args, _ = argp.parse_known_args(args)
|
self.args, _ = argp.parse_known_args(args)
|
||||||
self.config = config
|
self.config = config
|
||||||
self.database_bin = jail.get_process()
|
self.database_bin = jail.get_process()
|
||||||
|
self.name = "neo4j"
|
||||||
set_cpus("database-cpu-ids", self.database_bin, args)
|
set_cpus("database-cpu-ids", self.database_bin, args)
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
|
4
tests/macro_benchmark/groups/bfs_pokec/config.json
Normal file
4
tests/macro_benchmark/groups/bfs_pokec/config.json
Normal file
@ -0,0 +1,4 @@
|
|||||||
|
{
|
||||||
|
"duration": 30,
|
||||||
|
"client": "bfs_pokec_client"
|
||||||
|
}
|
6
tests/macro_benchmark/groups/bfs_pokec/download_dataset
Executable file
6
tests/macro_benchmark/groups/bfs_pokec/download_dataset
Executable file
@ -0,0 +1,6 @@
|
|||||||
|
#!/bin/bash -e
|
||||||
|
|
||||||
|
working_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||||
|
cd ${working_dir}
|
||||||
|
|
||||||
|
wget -nv -O pokec_small.setup.cypher http://deps.memgraph.io/pokec_small.setup.cypher
|
1
tests/macro_benchmark/groups/bfs_pokec/run.json
Normal file
1
tests/macro_benchmark/groups/bfs_pokec/run.json
Normal file
@ -0,0 +1 @@
|
|||||||
|
{}
|
@ -34,34 +34,38 @@ class LongRunningSuite:
|
|||||||
runner.stop()
|
runner.stop()
|
||||||
|
|
||||||
measurements = []
|
measurements = []
|
||||||
summary_format = "{:>15} {:>22}\n"
|
summary_format = "{:>15} {:>22} {:>22}\n"
|
||||||
self.summary = summary_format.format(
|
self.summary = summary_format.format(
|
||||||
"elapsed_time", "num_executed_queries")
|
"elapsed_time", "num_executed_queries", "num_executed_steps")
|
||||||
for result in results:
|
for result in results:
|
||||||
self.summary += summary_format.format(
|
self.summary += summary_format.format(
|
||||||
result["elapsed_time"], result["num_executed_queries"])
|
result["elapsed_time"], result["num_executed_queries"],
|
||||||
|
result["num_executed_steps"])
|
||||||
measurements.append({
|
measurements.append({
|
||||||
"target": "throughput",
|
"target": "throughput",
|
||||||
"time": result["elapsed_time"],
|
"time": result["elapsed_time"],
|
||||||
"value": result["num_executed_queries"],
|
"value": result["num_executed_queries"],
|
||||||
|
"steps": result["num_executed_steps"],
|
||||||
"unit": "number of executed queries",
|
"unit": "number of executed queries",
|
||||||
"type": "throughput"})
|
"type": "throughput"})
|
||||||
self.summary += "\n\nThroughtput: " + str(measurements[-1]["value"])
|
self.summary += "\n\nThroughput: " + str(measurements[-1]["value"])
|
||||||
|
self.summary += "\nExecuted steps: " + str(measurements[-1]["steps"])
|
||||||
return measurements
|
return measurements
|
||||||
|
|
||||||
def runners(self):
|
def runners(self):
|
||||||
return {"MemgraphRunner": MemgraphRunner, "NeoRunner": NeoRunner}
|
return {"MemgraphRunner": MemgraphRunner, "NeoRunner": NeoRunner}
|
||||||
|
|
||||||
def groups(self):
|
def groups(self):
|
||||||
return ["pokec", "card_fraud"]
|
return ["pokec", "card_fraud", "bfs_pokec"]
|
||||||
|
|
||||||
|
|
||||||
class _LongRunningRunner:
|
class _LongRunningRunner:
|
||||||
def __init__(self, args, database, num_client_workers):
|
def __init__(self, args, database, num_client_workers, workload):
|
||||||
self.log = logging.getLogger("_LongRunningRunner")
|
self.log = logging.getLogger("_LongRunningRunner")
|
||||||
self.database = database
|
self.database = database
|
||||||
self.query_client = QueryClient(args, num_client_workers)
|
self.query_client = QueryClient(args, num_client_workers)
|
||||||
self.long_running_client = LongRunningClient(args, num_client_workers)
|
self.long_running_client = LongRunningClient(args, num_client_workers,
|
||||||
|
workload)
|
||||||
|
|
||||||
def start(self):
|
def start(self):
|
||||||
self.database.start()
|
self.database.start()
|
||||||
@ -93,6 +97,9 @@ class MemgraphRunner(_LongRunningRunner):
|
|||||||
help="Number of workers")
|
help="Number of workers")
|
||||||
argp.add_argument("--num-client-workers", type=int, default=24,
|
argp.add_argument("--num-client-workers", type=int, default=24,
|
||||||
help="Number of clients")
|
help="Number of clients")
|
||||||
|
argp.add_argument("--workload", type=str, default="",
|
||||||
|
help="Type of client workload. Sets \
|
||||||
|
scenario flag for 'TestClient'")
|
||||||
self.args, remaining_args = argp.parse_known_args(args)
|
self.args, remaining_args = argp.parse_known_args(args)
|
||||||
assert not APOLLO or self.args.num_database_workers, \
|
assert not APOLLO or self.args.num_database_workers, \
|
||||||
"--num-database-workers is obligatory flag on apollo"
|
"--num-database-workers is obligatory flag on apollo"
|
||||||
@ -101,7 +108,8 @@ class MemgraphRunner(_LongRunningRunner):
|
|||||||
database = Memgraph(remaining_args, self.args.runner_config,
|
database = Memgraph(remaining_args, self.args.runner_config,
|
||||||
self.args.num_database_workers)
|
self.args.num_database_workers)
|
||||||
super(MemgraphRunner, self).__init__(
|
super(MemgraphRunner, self).__init__(
|
||||||
remaining_args, database, self.args.num_client_workers)
|
remaining_args, database, self.args.num_client_workers,
|
||||||
|
self.args.workload)
|
||||||
|
|
||||||
|
|
||||||
class NeoRunner(_LongRunningRunner):
|
class NeoRunner(_LongRunningRunner):
|
||||||
@ -115,9 +123,13 @@ class NeoRunner(_LongRunningRunner):
|
|||||||
help="Path to neo config file")
|
help="Path to neo config file")
|
||||||
argp.add_argument("--num-client-workers", type=int, default=24,
|
argp.add_argument("--num-client-workers", type=int, default=24,
|
||||||
help="Number of clients")
|
help="Number of clients")
|
||||||
|
argp.add_argument("--workload", type=str, default="",
|
||||||
|
help="Type of client workload. Sets \
|
||||||
|
scenario flag for 'TestClient'")
|
||||||
self.args, remaining_args = argp.parse_known_args(args)
|
self.args, remaining_args = argp.parse_known_args(args)
|
||||||
assert not APOLLO or self.args.num_client_workers, \
|
assert not APOLLO or self.args.num_client_workers, \
|
||||||
"--client-num-clients is obligatory flag on apollo"
|
"--client-num-clients is obligatory flag on apollo"
|
||||||
database = Neo(remaining_args, self.args.runner_config)
|
database = Neo(remaining_args, self.args.runner_config)
|
||||||
super(NeoRunner, self).__init__(
|
super(NeoRunner, self).__init__(
|
||||||
remaining_args, database, self.args.num_client_workers)
|
remaining_args, database, self.args.num_client_workers,
|
||||||
|
self.args.workload)
|
||||||
|
23
tests/macro_benchmark/run_bfs_pokec
Executable file
23
tests/macro_benchmark/run_bfs_pokec
Executable file
@ -0,0 +1,23 @@
|
|||||||
|
#!/bin/bash -e
|
||||||
|
|
||||||
|
script_dir="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
|
||||||
|
|
||||||
|
# Run bfs pokec bench (download dataset, run neo and memgraph, plot the results).
|
||||||
|
|
||||||
|
cd ${script_dir}
|
||||||
|
mkdir -p .results/bfs_pokec/
|
||||||
|
|
||||||
|
${script_dir}/groups/bfs_pokec/download_dataset
|
||||||
|
|
||||||
|
./harness LongRunningSuite MemgraphRunner --groups bfs_pokec --workload with_destination_node
|
||||||
|
mv .harness_summary ${script_dir}/.results/bfs_pokec/memgraph_bfs_1.summary
|
||||||
|
|
||||||
|
./harness LongRunningSuite NeoRunner --groups bfs_pokec --workload with_destination_node
|
||||||
|
mv .harness_summary ${script_dir}/.results/bfs_pokec/neo4j_bfs_1.summary
|
||||||
|
|
||||||
|
./harness LongRunningSuite MemgraphRunner --groups bfs_pokec --workload without_destination_node
|
||||||
|
mv .harness_summary ${script_dir}/.results/bfs_pokec/memgraph_bfs_2.summary
|
||||||
|
|
||||||
|
./harness LongRunningSuite NeoRunner --groups bfs_pokec --workload without_destination_node
|
||||||
|
mv .harness_summary ${script_dir}/.results/bfs_pokec/neo4j_bfs_2.summary
|
||||||
|
|
Loading…
Reference in New Issue
Block a user