// TODO: work in progress. #include #include #include #include #include #include #include #include #include #include #include #include #include "bolt_client.hpp" #include "common.hpp" #include "communication/bolt/client.hpp" #include "communication/bolt/v1/decoder/decoded_value.hpp" #include "io/network/network_endpoint.hpp" #include "io/network/socket.hpp" #include "threading/sync/spinlock.hpp" #include "utils/algorithm.hpp" #include "utils/algorithm.hpp" #include "utils/timer.hpp" using communication::bolt::DecodedValue; using communication::bolt::DecodedVertex; using communication::bolt::DecodedEdge; DEFINE_string(address, "127.0.0.1", "Server address"); DEFINE_string(port, "7687", "Server port"); DEFINE_int32(num_workers, 1, "Number of workers"); DEFINE_string(output, "", "Output file"); DEFINE_string(username, "", "Username for the database"); DEFINE_string(password, "", "Password for the database"); DEFINE_int32(duration, 30, "Number of seconds to execute benchmark"); const int MAX_RETRIES = 30; struct VertexAndEdges { DecodedVertex vertex; std::vector edges; std::vector vertices; }; std::atomic executed_queries; class Session { public: Session(const nlohmann::json &config, const std::string &address, const std::string &port, const std::string &username, const std::string &password) : config_(config), client_(address, port, username, password) {} private: const nlohmann::json &config_; BoltClient client_; std::unordered_map>> stats_; SpinLock lock_; auto Execute(const std::string &query, const std::map ¶ms, const std::string &query_name = "") { utils::Timer timer; auto result = ExecuteNTimesTillSuccess(client_, query, params, MAX_RETRIES); ++executed_queries; auto wall_time = timer.Elapsed(); auto metadata = result.metadata; metadata["wall_time"] = wall_time.count(); { std::unique_lock guard(lock_); if (query_name != "") { stats_[query_name].push_back(std::move(metadata)); } else { stats_[query].push_back(std::move(metadata)); } } return result; } auto MatchVertex(const std::string &label, int64_t id) { return Execute(fmt::format("MATCH (n :{} {{id : $id}}) RETURN n", label), {{"id", id}}); } auto MatchNeighbours(const std::string &label, int64_t id) { return Execute( fmt::format("MATCH (n :{} {{id : $id}})-[e]-(m) RETURN n, e, m", label), {{"id", id}}); } auto DetachDeleteVertex(const std::string &label, int64_t id) { return Execute( fmt::format("MATCH (n :{} {{id : $id}}) DETACH DELETE n", label), {{"id", id}}); } auto CreateVertex(const DecodedVertex &vertex) { std::stringstream os; os << "CREATE (n :"; utils::PrintIterable(os, vertex.labels, ":"); os << " {"; utils::PrintIterable( os, vertex.properties, ", ", [&](auto &stream, const auto &pair) { if (pair.second.type() == DecodedValue::Type::String) { stream << pair.first << ": \"" << pair.second << "\""; } else { stream << pair.first << ": " << pair.second; } }); os << "})"; return Execute(os.str(), {}, "CREATE (n :labels... {...})"); } auto CreateEdge(const DecodedVertex &from, const std::string &from_label, int64_t from_id, const std::string &to_label, int64_t to_id, const DecodedEdge &edge) { std::stringstream os; os << fmt::format("MATCH (n :{} {{id : {}}}) ", from_label, from_id); os << fmt::format("MATCH (m :{} {{id : {}}}) ", to_label, to_id); os << "CREATE (n)"; if (edge.to == from.id) { os << "<-"; } else { os << "-"; } os << "[:" << edge.type << " {"; utils::PrintIterable( os, edge.properties, ", ", [&](auto &stream, const auto &pair) { if (pair.second.type() == DecodedValue::Type::String) { stream << pair.first << ": \"" << pair.second << "\""; } else { stream << pair.first << ": " << pair.second; } }); os << "}]"; if (edge.from == from.id) { os << "->"; } else { os << "-"; } os << "(m) "; os << "RETURN n.id"; auto ret = Execute(os.str(), {}, "MATCH (n :label {id: ...}) MATCH (m :label {id: ...}) " "CREATE (n)-[:type ...]-(m)"); CHECK(ret.records.size() == 1U) << "from_id: " << from_id << " " << "to_id: " << to_id << " " << "ret.records.size(): " << ret.records.size(); return ret; } VertexAndEdges RetrieveAndDeleteVertex(const std::string &label, int64_t id) { auto vertex_record = MatchVertex(label, id).records; CHECK(vertex_record.size() == 1U) << "id: " << id << " " << "vertex_record.size(): " << vertex_record.size(); auto records = MatchNeighbours(label, id).records; DetachDeleteVertex(label, id); std::vector edges; edges.reserve(records.size()); for (const auto &record : records) { edges.push_back(record[1].ValueEdge()); } std::vector vertices; vertices.reserve(records.size()); for (const auto &record : records) { vertices.push_back(record[2].ValueVertex()); } return {vertex_record[0][0].ValueVertex(), edges, vertices}; } void ReturnVertexAndEdges(const VertexAndEdges &vertex_and_edges, const std::string &label) { int num_queries = 0; CreateVertex(vertex_and_edges.vertex); ++num_queries; for (int i = 0; i < static_cast(vertex_and_edges.vertices.size()); ++i) { auto records = CreateEdge( vertex_and_edges.vertex, label, vertex_and_edges.vertex.properties.at("id").ValueInt(), label, vertex_and_edges.vertices[i].properties.at("id").ValueInt(), vertex_and_edges.edges[i]) .records; CHECK(records.size() == 1U) << "Graph in invalid state " << vertex_and_edges.vertex.properties.at("id"); ++num_queries; } } public: void Run(int id, std::vector to_remove, std::atomic &keep_running) { std::mt19937 rg(id); std::vector removed; const auto &queries = config_["queries"]; const double read_probability = config_["read_probability"]; const std::string independent_label = config_["independent_label"]; while (keep_running) { std::uniform_real_distribution<> real_dist(0.0, 1.0); // Read query. if (real_dist(rg) < read_probability) { CHECK(queries.size()) << "Specify at least one read query or set read_probability to 0"; std::uniform_int_distribution<> read_query_dist(0, queries.size() - 1); const auto &query = queries[read_query_dist(rg)]; std::map params; for (const auto ¶m : query["params"]) { std::uniform_int_distribution param_value_dist( param["low"], param["high"]); params[param["name"]] = param_value_dist(rg); } Execute(query["query"], params); } else { auto remove_random = [&](auto &v) { CHECK(v.size()); std::uniform_int_distribution<> int_dist(0, v.size() - 1); std::swap(v.back(), v[int_dist(rg)]); auto ret = v.back(); v.pop_back(); return ret; }; if (real_dist(rg) < static_cast(removed.size()) / (removed.size() + to_remove.size())) { auto vertices_and_edges = remove_random(removed); ReturnVertexAndEdges(vertices_and_edges, independent_label); to_remove.push_back( vertices_and_edges.vertex.properties["id"].ValueInt()); } else { auto node_id = remove_random(to_remove); auto ret = RetrieveAndDeleteVertex(independent_label, node_id); removed.push_back(ret); } } } } auto ConsumeStats() { std::unique_lock guard(lock_); auto stats = stats_; stats_.clear(); return stats; } }; int64_t NumNodes(BoltClient &client, const std::string &label) { auto result = ExecuteNTimesTillSuccess( client, "MATCH (n :" + label + ") RETURN COUNT(n) as cnt", {}, MAX_RETRIES); return result.records[0][0].ValueInt(); } std::vector Neighbours(BoltClient &client, const std::string &label, int64_t id) { auto result = ExecuteNTimesTillSuccess( client, "MATCH (n :" + label + " {id: " + std::to_string(id) + "})-[e]-(m) RETURN m.id", {}, MAX_RETRIES); std::vector ret; for (const auto &record : result.records) { ret.push_back(record[0].ValueInt()); } return ret; } std::vector IndependentSet(BoltClient &client, const std::string &label) { const int64_t num_nodes = NumNodes(client, label); std::vector independent_nodes_ids; std::vector ids; std::unordered_set independent; for (int64_t i = 1; i <= num_nodes; ++i) { ids.push_back(i); independent.insert(i); } { std::mt19937 mt; std::shuffle(ids.begin(), ids.end(), mt); } for (auto i : ids) { if (independent.find(i) == independent.end()) continue; independent.erase(i); std::vector neighbour_ids = Neighbours(client, label, i); independent_nodes_ids.push_back(i); for (auto j : neighbour_ids) { independent.erase(j); } } LOG(INFO) << "Number of nodes nodes: " << num_nodes << "\n" << "Number of independent nodes: " << independent_nodes_ids.size(); return independent_nodes_ids; } int main(int argc, char **argv) { gflags::ParseCommandLineFlags(&argc, &argv, true); google::InitGoogleLogging(argv[0]); nlohmann::json config; std::cin >> config; const std::string independent_label = config["independent_label"]; auto independent_nodes_ids = [&] { BoltClient client(FLAGS_address, FLAGS_port, FLAGS_username, FLAGS_password); return IndependentSet(client, independent_label); }(); utils::Timer timer; std::vector threads; std::atomic keep_running{true}; int64_t next_to_assign = 0; std::vector> sessions; sessions.reserve(FLAGS_num_workers); for (int i = 0; i < FLAGS_num_workers; ++i) { int64_t size = independent_nodes_ids.size(); int64_t next_next_to_assign = next_to_assign + size / FLAGS_num_workers + (i < size % FLAGS_num_workers); sessions.push_back(std::make_unique( config, FLAGS_address, FLAGS_port, FLAGS_username, FLAGS_password)); std::vector to_remove( independent_nodes_ids.begin() + next_to_assign, independent_nodes_ids.begin() + next_next_to_assign); LOG(INFO) << next_to_assign << " " << next_next_to_assign; next_to_assign = next_next_to_assign; threads.emplace_back( [&](int thread_id, const std::vector to_remove) { sessions[thread_id]->Run(thread_id, std::move(to_remove), keep_running); }, i, std::move(to_remove)); } // Open stream for writing stats. std::streambuf *buf; std::ofstream f; if (FLAGS_output != "") { f.open(FLAGS_output); buf = f.rdbuf(); } else { buf = std::cout.rdbuf(); } std::ostream out(buf); while (timer.Elapsed().count() < FLAGS_duration) { std::unordered_map> aggregated_stats; using namespace std::chrono_literals; std::unordered_map>> stats; for (const auto &session : sessions) { auto session_stats = session->ConsumeStats(); for (const auto &session_query_stats : session_stats) { auto &query_stats = stats[session_query_stats.first]; query_stats.insert(query_stats.end(), session_query_stats.second.begin(), session_query_stats.second.end()); } } // TODO: Here we combine pure values, json and DecodedValue which is a // little bit chaotic. Think about refactoring this part to only use json // and write DecodedValue to json converter. const std::vector fields = { "wall_time", "parsing_time", "planning_time", "plan_execution_time", }; for (const auto &query_stats : stats) { std::map new_aggregated_query_stats; for (const auto &stats : query_stats.second) { for (const auto &field : fields) { auto it = stats.find(field); if (it != stats.end()) { new_aggregated_query_stats[field] += it->second.ValueDouble(); } } } int64_t new_count = query_stats.second.size(); auto &aggregated_query_stats = aggregated_stats[query_stats.first]; aggregated_query_stats.insert({"count", DecodedValue(0)}); auto old_count = aggregated_query_stats["count"].ValueInt(); aggregated_query_stats["count"].ValueInt() += new_count; for (const auto &stat : new_aggregated_query_stats) { auto it = aggregated_query_stats.insert({stat.first, DecodedValue(0.0)}) .first; it->second = (it->second.ValueDouble() * old_count + stat.second * new_count) / (old_count + new_count); } } out << "{\"num_executed_queries\": " << executed_queries << ", " << "\"elapsed_time\": " << timer.Elapsed().count() << ", \"queries\": ["; utils::PrintIterable(out, aggregated_stats, ", ", [](auto &stream, const auto &x) { stream << "{\"query\": " << nlohmann::json(x.first) << ", \"stats\": "; PrintJsonDecodedValue(stream, DecodedValue(x.second)); stream << "}"; }); out << "]}" << std::endl; out.flush(); std::this_thread::sleep_for(1s); } keep_running = false; for (int i = 0; i < FLAGS_num_workers; ++i) { threads[i].join(); } return 0; }