Do preprocess in client

Reviewers: buda

Reviewed By: buda

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D793
This commit is contained in:
Mislav Bradac 2017-09-13 21:20:03 +02:00
parent 59b9b7af21
commit d640ca3f1a
9 changed files with 150 additions and 116 deletions

View File

@ -67,8 +67,10 @@ class QueryClient:
str(queries), return_code, stderr)
raise Exception("BoltClient execution failed")
data = {"groups" : []}
with open(output) as f:
data = json.loads(f.read())
for line in f:
data["groups"].append(json.loads(line))
data[CPU_TIME] = cpu_time_end - cpu_time_start
data[MAX_MEMORY] = usage["max_memory"]

View File

@ -16,8 +16,9 @@ using communication::bolt::DecodedValue;
class BoltClient {
public:
BoltClient(std::string &address, std::string &port, std::string &username,
std::string &password, std::string database = "") {
BoltClient(const std::string &address, const std::string &port,
const std::string &username, const std::string &password,
const std::string & = "") {
SocketT socket;
EndpointT endpoint;

View File

@ -70,6 +70,7 @@ communication::bolt::QueryData ExecuteNTimesTillSuccess(
std::chrono::milliseconds(rand_dist_(pseudo_rand_gen_)));
}
}
LOG(WARNING) << query << " failed " << times << "times";
throw last_exception;
}
}

View File

@ -12,6 +12,7 @@
#include <glog/logging.h>
#include <json/json.hpp>
#include "bolt_client.hpp"
#include "common.hpp"
#include "communication/bolt/client.hpp"
#include "communication/bolt/v1/decoder/decoded_value.hpp"
@ -23,9 +24,6 @@
#include "utils/assert.hpp"
#include "utils/timer.hpp"
using SocketT = io::network::Socket;
using EndpointT = io::network::NetworkEndpoint;
using Client = communication::bolt::Client<SocketT>;
using communication::bolt::DecodedValue;
using communication::bolt::DecodedVertex;
using communication::bolt::DecodedEdge;
@ -39,7 +37,6 @@ DEFINE_string(password, "", "Password for the database");
DEFINE_int32(duration, 30, "Number of seconds to execute benchmark");
const int MAX_RETRIES = 30;
const int NUM_BUCKETS = 100;
struct VertexAndEdges {
DecodedVertex vertex;
@ -47,17 +44,23 @@ struct VertexAndEdges {
std::vector<DecodedVertex> vertices;
};
std::pair<VertexAndEdges, int> DetachDeleteVertex(Client &client,
std::pair<VertexAndEdges, int> DetachDeleteVertex(BoltClient &client,
const std::string &label,
int64_t id) {
auto vertex_record =
ExecuteNTimesTillSuccess(
client, "MATCH (n :" + label + " {id : $id}) RETURN n",
std::map<std::string, DecodedValue>{{"id", id}}, MAX_RETRIES)
.records;
CHECK(vertex_record.size() == 1U) << "id : " << id << " "
<< vertex_record.size();
auto records =
ExecuteNTimesTillSuccess(
client, "MATCH (n :" + label + " {id : $id})-[e]-(m) RETURN n, e, m",
std::map<std::string, DecodedValue>{{"id", id}}, MAX_RETRIES)
.records;
if (records.size() == 0U) return {{}, 1};
ExecuteNTimesTillSuccess(
client, "MATCH (n :" + label + " {id : $id})-[]-(m) DETACH DELETE n",
std::map<std::string, DecodedValue>{{"id", id}}, MAX_RETRIES);
@ -74,10 +77,11 @@ std::pair<VertexAndEdges, int> DetachDeleteVertex(Client &client,
vertices.push_back(record[2].ValueVertex());
}
return {{records[0][0].ValueVertex(), edges, vertices}, 2};
return {{vertex_record[0][0].ValueVertex(), edges, vertices}, 3};
}
int ReturnVertexAndEdges(Client &client, const VertexAndEdges &vertex_and_edges,
int ReturnVertexAndEdges(BoltClient &client,
const VertexAndEdges &vertex_and_edges,
const std::string &independent_label) {
int num_queries = 0;
{
@ -134,13 +138,35 @@ int ReturnVertexAndEdges(Client &client, const VertexAndEdges &vertex_and_edges,
if (x.type() == DecodedValue::Type::Double) {
LOG_EVERY_N(INFO, 5000) << "exec " << x.ValueDouble() << " planning "
<< y.ValueDouble();
CHECK(ret.records.size() == 1U) << "Graph in invalid state";
}
CHECK(ret.records.size() == 1U)
<< "Graph in invalid state "
<< vertex_and_edges.vertex.properties.at("id");
++num_queries;
}
return num_queries;
}
int64_t NumNodes(BoltClient &client, const std::string &label) {
auto result = ExecuteNTimesTillSuccess(
client, "MATCH (n :" + label + ") RETURN COUNT(n) as cnt", {},
MAX_RETRIES);
return result.records[0][0].ValueInt();
}
std::vector<int64_t> Neighbours(BoltClient &client, const std::string &label,
int64_t id) {
auto result = ExecuteNTimesTillSuccess(
client, "MATCH (n :" + label + " {id: " + std::to_string(id) +
"})-[e]-(m) RETURN m.id",
{}, MAX_RETRIES);
std::vector<int64_t> ret;
for (const auto &record : result.records) {
ret.push_back(record[0].ValueInt());
}
return ret;
}
int main(int argc, char **argv) {
gflags::ParseCommandLineFlags(&argc, &argv, true);
google::InitGoogleLogging(argv[0]);
@ -149,40 +175,60 @@ int main(int argc, char **argv) {
std::cin >> config;
const auto &queries = config["queries"];
const double read_probability = config["read_probability"];
const int64_t num_independent_nodes = config["num_independent_nodes"];
const std::string independent_label = config["independent_label"];
const int64_t num_nodes = config["num_nodes"];
std::vector<int64_t> independent_nodes_ids;
BoltClient client(FLAGS_address, FLAGS_port, FLAGS_username, FLAGS_password);
const int64_t num_nodes = NumNodes(client, independent_label);
{
std::vector<int64_t> ids;
std::unordered_set<int64_t> independent;
for (int64_t i = 1; i <= num_nodes; ++i) {
ids.push_back(i);
independent.insert(i);
}
{
std::mt19937 mt;
std::shuffle(ids.begin(), ids.end(), mt);
}
for (auto i : ids) {
if (independent.find(i) == independent.end()) continue;
independent.erase(i);
std::vector<int64_t> neighbour_ids =
Neighbours(client, independent_label, i);
independent_nodes_ids.push_back(i);
for (auto j : neighbour_ids) {
independent.erase(j);
}
}
}
utils::Timer timer;
std::vector<std::thread> threads;
std::atomic<int64_t> executed_queries{0};
std::atomic<bool> keep_running{true};
LOG(INFO) << "nodes " << num_nodes << " independent "
<< independent_nodes_ids.size();
int64_t next_to_assign = 0;
for (int i = 0; i < FLAGS_num_workers; ++i) {
int64_t size = independent_nodes_ids.size();
int64_t next_next_to_assign = next_to_assign + size / FLAGS_num_workers +
(i < size % FLAGS_num_workers);
std::vector<int64_t> to_remove(
independent_nodes_ids.begin() + next_to_assign,
independent_nodes_ids.begin() + next_next_to_assign);
LOG(INFO) << next_to_assign << " " << next_next_to_assign;
next_to_assign = next_next_to_assign;
threads.emplace_back(
[&](int thread_id) {
// Initialise client.
SocketT socket;
EndpointT endpoint;
try {
endpoint = EndpointT(FLAGS_address, FLAGS_port);
} catch (const io::network::NetworkEndpointException &e) {
LOG(FATAL) << "Invalid address or port: " << FLAGS_address << ":"
<< FLAGS_port;
}
if (!socket.Connect(endpoint)) {
LOG(FATAL) << "Could not connect to: " << FLAGS_address << ":"
<< FLAGS_port;
}
Client client(std::move(socket), FLAGS_username, FLAGS_password);
[&](int thread_id, std::vector<int64_t> to_remove) {
BoltClient client(FLAGS_address, FLAGS_port, FLAGS_username,
FLAGS_password);
std::mt19937 random_gen(thread_id);
int64_t to_remove =
num_independent_nodes / FLAGS_num_workers * thread_id + 1;
int64_t last_to_remove =
to_remove + num_independent_nodes / FLAGS_num_workers;
bool remove = true;
int64_t num_shifts = 0;
std::vector<VertexAndEdges> removed;
while (keep_running) {
@ -203,40 +249,34 @@ int main(int argc, char **argv) {
MAX_RETRIES);
++executed_queries;
} else {
if (!remove) {
if (real_dist(random_gen) <
static_cast<double>(removed.size()) /
(removed.size() + to_remove.size())) {
CHECK(removed.size());
std::uniform_int_distribution<> int_dist(0, removed.size() - 1);
std::swap(removed.back(), removed[int_dist(random_gen)]);
executed_queries += ReturnVertexAndEdges(client, removed.back(),
independent_label);
to_remove.push_back(
removed.back().vertex.properties["id"].ValueInt());
removed.pop_back();
if (removed.empty()) {
remove = true;
}
} else {
auto ret =
DetachDeleteVertex(client, independent_label, to_remove);
++to_remove;
CHECK(to_remove.size());
std::uniform_int_distribution<> int_dist(0,
to_remove.size() - 1);
std::swap(to_remove.back(), to_remove[int_dist(random_gen)]);
auto ret = DetachDeleteVertex(client, independent_label,
to_remove.back());
removed.push_back(ret.first);
to_remove.pop_back();
executed_queries += ret.second;
if (ret.second > 1) {
removed.push_back(std::move(ret.first));
}
if (to_remove == last_to_remove) {
for (auto &x : removed) {
x.vertex.properties["id"].ValueInt() += num_nodes;
}
remove = false;
++num_shifts;
to_remove =
num_independent_nodes / FLAGS_num_workers * thread_id +
1 + num_shifts * num_nodes;
last_to_remove =
to_remove + num_independent_nodes / FLAGS_num_workers;
}
}
}
}
client.Close();
},
i);
i, std::move(to_remove));
}
// Open stream for writing stats.

View File

@ -28,8 +28,9 @@ class ClientQueryException : public ClientException {
class Client {
public:
Client(std::string &host, std::string &port, std::string &username,
std::string &password, std::string database = "") {
Client(const std::string &host, const std::string &port,
const std::string &username, const std::string &password,
const std::string &database = "") {
// https://www.postgresql.org/docs/9.4/static/libpq-connect.html#LIBPQ-PARAMKEYWORDS
std::string pass = "";
if (password != "") {

View File

@ -8,6 +8,7 @@
#include "communication/bolt/v1/decoder/decoded_value.hpp"
#include "threading/sync/spinlock.hpp"
#include "utils/algorithm.hpp"
#include "utils/string.hpp"
#include "utils/timer.hpp"
#include "bolt_client.hpp"
@ -45,25 +46,20 @@ void PrintSummary(
os << "{\"wall_time\": " << duration << ", "
<< "\"metadatas\": ";
PrintJsonMetadata(os, metadata);
os << "}";
os << "}\n";
}
template <typename ClientT, typename ExceptionT>
void ExecuteQueries(std::istream &istream, int num_workers,
template <typename ClientT>
void ExecuteQueries(const std::vector<std::string> &queries, int num_workers,
std::ostream &ostream, std::string &address,
std::string &port, std::string &username,
std::string &password, std::string &database) {
std::string query;
std::vector<std::thread> threads;
SpinLock spinlock;
uint64_t last = 0;
std::vector<std::string> queries;
std::vector<std::map<std::string, DecodedValue>> metadata;
while (std::getline(istream, query)) {
queries.push_back(query);
}
metadata.resize(queries.size());
utils::Timer timer;
@ -86,7 +82,7 @@ void ExecuteQueries(std::istream &istream, int num_workers,
try {
metadata[pos] =
ExecuteNTimesTillSuccess(client, str, {}, MAX_RETRIES).metadata;
} catch (const ExceptionT &e) {
} catch (const utils::BasicException &e) {
LOG(FATAL) << "Could not execute query '" << str << "' "
<< MAX_RETRIES << " times! Error message: " << e.what();
}
@ -128,27 +124,38 @@ int main(int argc, char **argv) {
std::string port = FLAGS_port;
if (FLAGS_protocol == "bolt") {
if (port == "") port = "7687";
using BoltClientT = BoltClient;
using BoltExceptionT = communication::bolt::ClientQueryException;
ExecuteQueries<BoltClientT, BoltExceptionT>(
*istream, FLAGS_num_workers, *ostream, FLAGS_address, port,
FLAGS_username, FLAGS_password, FLAGS_database);
} else if (FLAGS_protocol == "postgres") {
LOG(FATAL) << "Postgres not yet supported";
// TODO: Currently libpq is linked dynamically so it is a pain to move
// harness_client executable to other machines without libpq.
// CHECK(FLAGS_username != "") << "Username can't be empty for
// postgres!";
// CHECK(FLAGS_database != "") << "Database can't be empty for
// postgres!";
// if (port == "") port = "5432";
//
// using PostgresClientT = postgres::Client;
// using PostgresExceptionT = postgres::ClientQueryException;
// ExecuteQueries<PostgresClientT, PostgresExceptionT>(
// *istream, FLAGS_num_workers, *ostream, FLAGS_address, port,
// FLAGS_username, FLAGS_password, FLAGS_database);
if (port == "") port = "5432";
}
while (!istream->eof()) {
std::vector<std::string> queries;
std::string query;
while (std::getline(*istream, query) && utils::Trim(query) != "" &&
utils::Trim(query) != ";") {
queries.push_back(query);
}
if (FLAGS_protocol == "bolt") {
ExecuteQueries<BoltClient>(queries, FLAGS_num_workers, *ostream,
FLAGS_address, port, FLAGS_username,
FLAGS_password, FLAGS_database);
} else if (FLAGS_protocol == "postgres") {
LOG(FATAL) << "Postgres not yet supported";
// TODO: Currently libpq is linked dynamically so it is a pain to move
// harness_client executable to other machines without libpq.
// CHECK(FLAGS_username != "") << "Username can't be empty for
// postgres!";
// CHECK(FLAGS_database != "") << "Database can't be empty for
// postgres!";
// if (port == "") port = "5432";
//
// using PostgresClientT = postgres::Client;
// using PostgresExceptionT = postgres::ClientQueryException;
// ExecuteQueries<PostgresClientT, PostgresExceptionT>(
// *istream, FLAGS_num_workers, *ostream, FLAGS_address, port,
// FLAGS_username, FLAGS_password, FLAGS_database);
}
}
return 0;

View File

@ -1,6 +1,4 @@
{
"num_independent_nodes" : 4111,
"num_nodes" : 10000,
"independent_label": "User",
"read_probability": 0.5,
"queries" : [

View File

@ -24,33 +24,14 @@ class LongRunningSuite:
def run(self, scenario, group_name, scenario_name, runner):
runner.start()
# This suite allows empty lines in setup. Those lines separate query
# groups. It is guaranteed that groups will be executed sequentially,
# but queries in each group are possibly executed concurrently.
query_groups = [[]]
for query in scenario.get("setup")():
if query == "":
query_groups.append([])
else:
query_groups[-1].append(query)
if query_groups[-1] == []:
query_groups.pop()
log.info("Executing {} query groups in setup"
.format(len(query_groups)))
for i, queries in enumerate(query_groups):
start_time = time.time()
# TODO: number of threads configurable
runner.setup(queries, self.args.num_client_workers)
log.info("\t{}. group imported in done in {:.2f} seconds".format(
i + 1, time.time() - start_time))
log.info("Executing setup")
runner.setup(scenario.get("setup")(), self.args.num_client_workers)
config = next(scenario.get("config")())
duration = config["duration"]
log.info("Executing run for {} seconds with {} client workers".format(
duration, self.args.num_client_workers))
# TODO: number of threads configurable
results = runner.run(next(scenario.get("run")()), duration,
self.args.num_client_workers)

View File

@ -91,14 +91,17 @@ class _QuerySuite:
execute("itersetup")
run_result = execute("run",
scenario_config.get("num_client_workers", 1))
add_measurement(run_result, iteration, WALL_TIME)
add_measurement(run_result, iteration, CPU_TIME)
add_measurement(run_result, iteration, MAX_MEMORY)
assert len(run_result["groups"]) == 1, \
"Multiple groups in run step not yet supported"
group = run_result["groups"][0]
add_measurement(group, iteration, WALL_TIME)
for measurement in ["parsing_time",
"plan_execution_time",
"planning_time"] :
for i in range(len(run_result.get("metadatas", []))):
add_measurement(run_result["metadatas"][i], iteration,
for i in range(len(group.get("metadatas", []))):
add_measurement(group["metadatas"][i], iteration,
measurement)
execute("iterteardown")