From c0210c8d552e07b63c28358a5856a77b26af8daa Mon Sep 17 00:00:00 2001 From: Marin Tomic Date: Wed, 28 Mar 2018 13:53:00 +0200 Subject: [PATCH] Real-world like graph generator --- .../macro_benchmark/clients/graph_500_bfs.cpp | 62 +++++++++++++ tests/manual/graph_500_generate_snapshot.cpp | 91 +++++++++++++++++++ 2 files changed, 153 insertions(+) create mode 100644 tests/macro_benchmark/clients/graph_500_bfs.cpp create mode 100644 tests/manual/graph_500_generate_snapshot.cpp diff --git a/tests/macro_benchmark/clients/graph_500_bfs.cpp b/tests/macro_benchmark/clients/graph_500_bfs.cpp new file mode 100644 index 000000000..68053a373 --- /dev/null +++ b/tests/macro_benchmark/clients/graph_500_bfs.cpp @@ -0,0 +1,62 @@ +#include +#include +#include +#include + +#include "gflags/gflags.h" + +#include "long_running_common.hpp" +#include "stats/stats.hpp" +#include "stats/stats_rpc_messages.hpp" +#include "threading/sync/rwlock.hpp" + +class Graph500BfsClient : public TestClient { + public: + Graph500BfsClient(int id) : TestClient(), rg_(id) { + auto result = Execute("MATCH (n:Node) RETURN count(1)", {}, "NumNodes"); + CHECK(result) << "Read-only query should not fail"; + num_nodes_ = result->records[0][0].ValueInt(); + } + + private: + std::mt19937 rg_; + int num_nodes_; + + void Step() override { + std::uniform_int_distribution dist(0, num_nodes_ - 1); + + int start = -1; + do { + start = dist(rg_); + auto result = Execute( + "MATCH (n:Node {id: $id})-->(m) WHERE m != n " + "RETURN count(m) AS degree", + {{"id", start}}, "GetDegree"); + CHECK(result) << "Read-only query should not fail"; + if (result->records[0][0].ValueInt() > 0) { + break; + } + } while (true); + + auto result = + Execute("MATCH path = (n:Node {id: $id})-[*bfs]->() RETURN count(1)", + {{"id", start}}, "Bfs"); + CHECK(result) << "Read-only query should not fail!"; + } +}; + +int main(int argc, char **argv) { + gflags::ParseCommandLineFlags(&argc, &argv, true); + google::InitGoogleLogging(argv[0]); + + std::vector> clients; + for (int i = 0; i < FLAGS_num_workers; ++i) { + clients.emplace_back(std::make_unique(i)); + } + + RunMultithreadedTest(clients); + + stats::StopStatsLogging(); + + return 0; +} diff --git a/tests/manual/graph_500_generate_snapshot.cpp b/tests/manual/graph_500_generate_snapshot.cpp new file mode 100644 index 000000000..326028b9f --- /dev/null +++ b/tests/manual/graph_500_generate_snapshot.cpp @@ -0,0 +1,91 @@ +#include + +#include "gflags/gflags.h" +#include "glog/logging.h" +#include "json/json.hpp" + +#include "snapshot_generation/snapshot_writer.hpp" + +DEFINE_int32(num_workers, 1, + "Number of distributed workers (including master)"); +DEFINE_string(dir, "tmp", + "Directory for storing workers durability directories."); +DEFINE_string(config, "", "Path to config JSON file"); + +/** + * Config file should be defined as follows: + * { + * "scale": 10, + * "edge_factor": 16, + * "probabilities": [0.57, 0.19, 0.19] + * } + */ + +using namespace snapshot_generation; + +int main(int argc, char *argv[]) { + google::ParseCommandLineFlags(&argc, &argv, true); + google::InitGoogleLogging(argv[0]); + + nlohmann::json config; + { + std::ifstream config_file(FLAGS_config); + config_file >> config; + } + + int64_t N = 1LL << config["scale"].get(); + int64_t M = config["edge_factor"].get() * N; + + const double A = config["probabilities"][0]; + const double B = config["probabilities"][1]; + const double C = config["probabilities"][2]; + const double D = 1 - (A + B + C); + + std::vector> edges(M); + + std::mt19937_64 gen(std::random_device{}()); + std::uniform_real_distribution dist(0, 1); + + for (int i = 0; i < M; ++i) { + for (int j = 0; j < config["scale"]; ++j) { + if (dist(gen) > A + B) { + edges[i].first |= 1 << j; + if (dist(gen) > C / (C + D)) { + edges[i].second |= 1 << j; + } + } else { + if (dist(gen) > A / (A + B)) { + edges[i].second |= 1 << j; + } + } + } + } + + std::vector vertex_labels(N); + std::iota(vertex_labels.begin(), vertex_labels.end(), 0); + std::random_shuffle(vertex_labels.begin(), vertex_labels.end()); + + GraphState state(FLAGS_num_workers); + + state.CreateIndex("Node", "id"); + + std::vector vertices; + vertices.reserve(N); + + for (int i = 0; i < N; ++i) { + vertices.emplace_back(state.CreateNode(i % FLAGS_num_workers, {"Node"}, + {{"id", vertex_labels[i]}})); + } + + std::random_shuffle(edges.begin(), edges.end()); + for (int i = 0; i < M; ++i) { + auto e = edges[i]; + VLOG(1) << vertex_labels[e.first] << " " << vertex_labels[e.second]; + state.CreateEdge(vertices[e.first], vertices[e.second], "Edge", {}); + } + + LOG(INFO) << fmt::format("nodes = {}, edges = {}", N, M); + WriteToSnapshot(state, FLAGS_dir); + + return 0; +}