Real-world like graph generator

This commit is contained in:
Marin Tomic 2018-03-28 13:53:00 +02:00
parent 86072a993a
commit c0210c8d55
2 changed files with 153 additions and 0 deletions

View File

@ -0,0 +1,62 @@
#include <memory>
#include <random>
#include <shared_mutex>
#include <vector>
#include "gflags/gflags.h"
#include "long_running_common.hpp"
#include "stats/stats.hpp"
#include "stats/stats_rpc_messages.hpp"
#include "threading/sync/rwlock.hpp"
class Graph500BfsClient : public TestClient {
public:
Graph500BfsClient(int id) : TestClient(), rg_(id) {
auto result = Execute("MATCH (n:Node) RETURN count(1)", {}, "NumNodes");
CHECK(result) << "Read-only query should not fail";
num_nodes_ = result->records[0][0].ValueInt();
}
private:
std::mt19937 rg_;
int num_nodes_;
void Step() override {
std::uniform_int_distribution<int64_t> dist(0, num_nodes_ - 1);
int start = -1;
do {
start = dist(rg_);
auto result = Execute(
"MATCH (n:Node {id: $id})-->(m) WHERE m != n "
"RETURN count(m) AS degree",
{{"id", start}}, "GetDegree");
CHECK(result) << "Read-only query should not fail";
if (result->records[0][0].ValueInt() > 0) {
break;
}
} while (true);
auto result =
Execute("MATCH path = (n:Node {id: $id})-[*bfs]->() RETURN count(1)",
{{"id", start}}, "Bfs");
CHECK(result) << "Read-only query should not fail!";
}
};
int main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
google::InitGoogleLogging(argv[0]);
std::vector<std::unique_ptr<TestClient>> clients;
for (int i = 0; i < FLAGS_num_workers; ++i) {
clients.emplace_back(std::make_unique<Graph500BfsClient>(i));
}
RunMultithreadedTest(clients);
stats::StopStatsLogging();
return 0;
}

View File

@ -0,0 +1,91 @@
#include <random>
#include "gflags/gflags.h"
#include "glog/logging.h"
#include "json/json.hpp"
#include "snapshot_generation/snapshot_writer.hpp"
DEFINE_int32(num_workers, 1,
"Number of distributed workers (including master)");
DEFINE_string(dir, "tmp",
"Directory for storing workers durability directories.");
DEFINE_string(config, "", "Path to config JSON file");
/**
* Config file should be defined as follows:
* {
* "scale": 10,
* "edge_factor": 16,
* "probabilities": [0.57, 0.19, 0.19]
* }
*/
using namespace snapshot_generation;
int main(int argc, char *argv[]) {
google::ParseCommandLineFlags(&argc, &argv, true);
google::InitGoogleLogging(argv[0]);
nlohmann::json config;
{
std::ifstream config_file(FLAGS_config);
config_file >> config;
}
int64_t N = 1LL << config["scale"].get<int>();
int64_t M = config["edge_factor"].get<double>() * N;
const double A = config["probabilities"][0];
const double B = config["probabilities"][1];
const double C = config["probabilities"][2];
const double D = 1 - (A + B + C);
std::vector<std::pair<int64_t, int64_t>> edges(M);
std::mt19937_64 gen(std::random_device{}());
std::uniform_real_distribution<double> dist(0, 1);
for (int i = 0; i < M; ++i) {
for (int j = 0; j < config["scale"]; ++j) {
if (dist(gen) > A + B) {
edges[i].first |= 1 << j;
if (dist(gen) > C / (C + D)) {
edges[i].second |= 1 << j;
}
} else {
if (dist(gen) > A / (A + B)) {
edges[i].second |= 1 << j;
}
}
}
}
std::vector<int64_t> vertex_labels(N);
std::iota(vertex_labels.begin(), vertex_labels.end(), 0);
std::random_shuffle(vertex_labels.begin(), vertex_labels.end());
GraphState state(FLAGS_num_workers);
state.CreateIndex("Node", "id");
std::vector<gid::Gid> vertices;
vertices.reserve(N);
for (int i = 0; i < N; ++i) {
vertices.emplace_back(state.CreateNode(i % FLAGS_num_workers, {"Node"},
{{"id", vertex_labels[i]}}));
}
std::random_shuffle(edges.begin(), edges.end());
for (int i = 0; i < M; ++i) {
auto e = edges[i];
VLOG(1) << vertex_labels[e.first] << " " << vertex_labels[e.second];
state.CreateEdge(vertices[e.first], vertices[e.second], "Edge", {});
}
LOG(INFO) << fmt::format("nodes = {}, edges = {}", N, M);
WriteToSnapshot(state, FLAGS_dir);
return 0;
}