Add parallel customers/Otto test
Summary:
Looking for connected components in a random graph. This test performs the following:
- Generates a random graph that is NOT sequential in memory (otherwise itertion over edges is 2 or more times faster).
- Connectivity by iterating over all the edges.
- Ditto over vertices.
- Ditto over vertices in parallel.
Not done:
- Edge filtering based on XY. I could/should add that to see how it affects perf.
- Getting component info out from union-find.
Local results are encouraging. Iterating over the graph is the bottleneck. Still, I get connectivity of 10M vertices/edges in <7sec (parallel over vertices). Will test on 250M remote now.
Locally obtained results (20M/20M, 2 threads)
```
I1115 14:57:55.136875 357 otto_parallel.cpp:50] Generating 2000000 vertices...
I1115 14:58:19.057734 357 otto_parallel.cpp:74] Generated 2000000 vertices in 23.9208 seconds.
I1115 14:58:19.919221 357 otto_parallel.cpp:82] Generating 2000000 edges...
I1115 14:58:39.519951 357 otto_parallel.cpp:93] Generated 2000000 edges in 19.3398 seconds.
I1115 14:58:39.520349 357 otto_parallel.cpp:196] Running Edge iteration...
I1115 14:58:43.857264 357 otto_parallel.cpp:199] Done in 4.33691 seconds, result: 3999860270398
I1115 14:58:43.857316 357 otto_parallel.cpp:196] Running Vertex iteration...
I1115 14:58:49.498181 357 otto_parallel.cpp:199] Done in 5.64087 seconds, result: 4000090070787
I1115 14:58:49.498208 357 otto_parallel.cpp:196] Running Connected components - Edges...
I1115 14:58:54.232530 357 otto_parallel.cpp:199] Done in 4.73433 seconds, result: 323935
I1115 14:58:54.232570 357 otto_parallel.cpp:196] Running Connected components - Vertices...
I1115 14:59:00.412395 357 otto_parallel.cpp:199] Done in 6.17983 seconds, result: 323935
I1115 14:59:00.412422 357 otto_parallel.cpp:196] Running Parallel connected components - Vertices...
I1115 14:59:04.662087 357 otto_parallel.cpp:199] Done in 4.24967 seconds, result: 323935
I1115 14:59:04.662116 357 otto_parallel.cpp:196] Running Expansion...
I1115 14:59:13.913015 357 otto_parallel.cpp:199] Done in 9.25091 seconds, result: 323935
```
Reviewers: buda, mislav.bradac, dgleich, teon.banek
Reviewed By: buda, teon.banek
Subscribers: teon.banek, pullbot
Differential Revision: https://phabricator.memgraph.io/D982
2017-11-22 17:04:12 +08:00
|
|
|
#include <algorithm>
|
|
|
|
#include <limits>
|
|
|
|
#include <mutex>
|
|
|
|
#include <random>
|
|
|
|
#include <set>
|
|
|
|
#include <stack>
|
|
|
|
#include <thread>
|
|
|
|
|
|
|
|
#include "gflags/gflags.h"
|
|
|
|
#include "glog/logging.h"
|
|
|
|
|
|
|
|
#include "data_structures/union_find.hpp"
|
|
|
|
#include "database/graph_db.hpp"
|
|
|
|
#include "database/graph_db_accessor.hpp"
|
|
|
|
#include "storage/property_value.hpp"
|
|
|
|
#include "threading/sync/spinlock.hpp"
|
|
|
|
#include "utils/bound.hpp"
|
|
|
|
#include "utils/timer.hpp"
|
|
|
|
|
|
|
|
DEFINE_int32(thread_count, 1, "Number of threads");
|
|
|
|
DEFINE_int32(vertex_count, 1000, "Number of vertices");
|
|
|
|
DEFINE_int32(edge_count, 1000, "Number of edges");
|
|
|
|
DECLARE_int32(gc_cycle_sec);
|
|
|
|
|
|
|
|
static const std::string kLabel{"kLabel"};
|
|
|
|
static const std::string kProperty{"kProperty"};
|
|
|
|
|
|
|
|
void GenerateGraph(GraphDb &db) {
|
|
|
|
{
|
|
|
|
GraphDbAccessor dba{db};
|
|
|
|
dba.BuildIndex(dba.Label(kLabel), dba.Property(kProperty));
|
|
|
|
dba.Commit();
|
|
|
|
}
|
|
|
|
|
|
|
|
// Randomize the sequence of IDs of created vertices and edges to simulate
|
|
|
|
// real-world lack of locality.
|
|
|
|
auto make_id_vector = [](size_t size) {
|
2017-12-28 17:35:12 +08:00
|
|
|
gid::Generator generator{0};
|
2017-12-05 17:32:31 +08:00
|
|
|
std::vector<gid::Gid> ids(size);
|
|
|
|
for (size_t i = 0; i < size; ++i)
|
|
|
|
ids[i] = generator.Next(std::experimental::nullopt);
|
Add parallel customers/Otto test
Summary:
Looking for connected components in a random graph. This test performs the following:
- Generates a random graph that is NOT sequential in memory (otherwise itertion over edges is 2 or more times faster).
- Connectivity by iterating over all the edges.
- Ditto over vertices.
- Ditto over vertices in parallel.
Not done:
- Edge filtering based on XY. I could/should add that to see how it affects perf.
- Getting component info out from union-find.
Local results are encouraging. Iterating over the graph is the bottleneck. Still, I get connectivity of 10M vertices/edges in <7sec (parallel over vertices). Will test on 250M remote now.
Locally obtained results (20M/20M, 2 threads)
```
I1115 14:57:55.136875 357 otto_parallel.cpp:50] Generating 2000000 vertices...
I1115 14:58:19.057734 357 otto_parallel.cpp:74] Generated 2000000 vertices in 23.9208 seconds.
I1115 14:58:19.919221 357 otto_parallel.cpp:82] Generating 2000000 edges...
I1115 14:58:39.519951 357 otto_parallel.cpp:93] Generated 2000000 edges in 19.3398 seconds.
I1115 14:58:39.520349 357 otto_parallel.cpp:196] Running Edge iteration...
I1115 14:58:43.857264 357 otto_parallel.cpp:199] Done in 4.33691 seconds, result: 3999860270398
I1115 14:58:43.857316 357 otto_parallel.cpp:196] Running Vertex iteration...
I1115 14:58:49.498181 357 otto_parallel.cpp:199] Done in 5.64087 seconds, result: 4000090070787
I1115 14:58:49.498208 357 otto_parallel.cpp:196] Running Connected components - Edges...
I1115 14:58:54.232530 357 otto_parallel.cpp:199] Done in 4.73433 seconds, result: 323935
I1115 14:58:54.232570 357 otto_parallel.cpp:196] Running Connected components - Vertices...
I1115 14:59:00.412395 357 otto_parallel.cpp:199] Done in 6.17983 seconds, result: 323935
I1115 14:59:00.412422 357 otto_parallel.cpp:196] Running Parallel connected components - Vertices...
I1115 14:59:04.662087 357 otto_parallel.cpp:199] Done in 4.24967 seconds, result: 323935
I1115 14:59:04.662116 357 otto_parallel.cpp:196] Running Expansion...
I1115 14:59:13.913015 357 otto_parallel.cpp:199] Done in 9.25091 seconds, result: 323935
```
Reviewers: buda, mislav.bradac, dgleich, teon.banek
Reviewed By: buda, teon.banek
Subscribers: teon.banek, pullbot
Differential Revision: https://phabricator.memgraph.io/D982
2017-11-22 17:04:12 +08:00
|
|
|
std::random_shuffle(ids.begin(), ids.end());
|
|
|
|
return ids;
|
|
|
|
};
|
|
|
|
|
|
|
|
std::vector<VertexAccessor> vertices;
|
|
|
|
vertices.reserve(FLAGS_vertex_count);
|
|
|
|
{
|
|
|
|
CHECK(FLAGS_vertex_count % FLAGS_thread_count == 0)
|
|
|
|
<< "Thread count must be a factor of vertex count";
|
|
|
|
LOG(INFO) << "Generating " << FLAGS_vertex_count << " vertices...";
|
|
|
|
utils::Timer timer;
|
|
|
|
auto vertex_ids = make_id_vector(FLAGS_vertex_count);
|
|
|
|
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
SpinLock vertices_lock;
|
|
|
|
for (int i = 0; i < FLAGS_thread_count; ++i) {
|
|
|
|
threads.emplace_back([&db, &vertex_ids, &vertices, &vertices_lock, i]() {
|
|
|
|
GraphDbAccessor dba{db};
|
|
|
|
auto label = dba.Label(kLabel);
|
|
|
|
auto property = dba.Property(kProperty);
|
|
|
|
auto batch_size = FLAGS_vertex_count / FLAGS_thread_count;
|
|
|
|
for (int j = i * batch_size; j < (i + 1) * batch_size; ++j) {
|
|
|
|
auto vertex = dba.InsertVertex(vertex_ids[j]);
|
|
|
|
vertex.add_label(label);
|
2017-12-05 17:32:31 +08:00
|
|
|
vertex.PropsSet(property, static_cast<int64_t>(vertex_ids[j]));
|
Add parallel customers/Otto test
Summary:
Looking for connected components in a random graph. This test performs the following:
- Generates a random graph that is NOT sequential in memory (otherwise itertion over edges is 2 or more times faster).
- Connectivity by iterating over all the edges.
- Ditto over vertices.
- Ditto over vertices in parallel.
Not done:
- Edge filtering based on XY. I could/should add that to see how it affects perf.
- Getting component info out from union-find.
Local results are encouraging. Iterating over the graph is the bottleneck. Still, I get connectivity of 10M vertices/edges in <7sec (parallel over vertices). Will test on 250M remote now.
Locally obtained results (20M/20M, 2 threads)
```
I1115 14:57:55.136875 357 otto_parallel.cpp:50] Generating 2000000 vertices...
I1115 14:58:19.057734 357 otto_parallel.cpp:74] Generated 2000000 vertices in 23.9208 seconds.
I1115 14:58:19.919221 357 otto_parallel.cpp:82] Generating 2000000 edges...
I1115 14:58:39.519951 357 otto_parallel.cpp:93] Generated 2000000 edges in 19.3398 seconds.
I1115 14:58:39.520349 357 otto_parallel.cpp:196] Running Edge iteration...
I1115 14:58:43.857264 357 otto_parallel.cpp:199] Done in 4.33691 seconds, result: 3999860270398
I1115 14:58:43.857316 357 otto_parallel.cpp:196] Running Vertex iteration...
I1115 14:58:49.498181 357 otto_parallel.cpp:199] Done in 5.64087 seconds, result: 4000090070787
I1115 14:58:49.498208 357 otto_parallel.cpp:196] Running Connected components - Edges...
I1115 14:58:54.232530 357 otto_parallel.cpp:199] Done in 4.73433 seconds, result: 323935
I1115 14:58:54.232570 357 otto_parallel.cpp:196] Running Connected components - Vertices...
I1115 14:59:00.412395 357 otto_parallel.cpp:199] Done in 6.17983 seconds, result: 323935
I1115 14:59:00.412422 357 otto_parallel.cpp:196] Running Parallel connected components - Vertices...
I1115 14:59:04.662087 357 otto_parallel.cpp:199] Done in 4.24967 seconds, result: 323935
I1115 14:59:04.662116 357 otto_parallel.cpp:196] Running Expansion...
I1115 14:59:13.913015 357 otto_parallel.cpp:199] Done in 9.25091 seconds, result: 323935
```
Reviewers: buda, mislav.bradac, dgleich, teon.banek
Reviewed By: buda, teon.banek
Subscribers: teon.banek, pullbot
Differential Revision: https://phabricator.memgraph.io/D982
2017-11-22 17:04:12 +08:00
|
|
|
vertices_lock.lock();
|
|
|
|
vertices.emplace_back(vertex);
|
|
|
|
vertices_lock.unlock();
|
|
|
|
}
|
|
|
|
dba.Commit();
|
|
|
|
});
|
|
|
|
}
|
|
|
|
for (auto &t : threads) t.join();
|
|
|
|
LOG(INFO) << "Generated " << FLAGS_vertex_count << " vertices in "
|
|
|
|
<< timer.Elapsed().count() << " seconds.";
|
|
|
|
}
|
|
|
|
{
|
|
|
|
GraphDbAccessor dba{db};
|
|
|
|
for (int i = 0; i < FLAGS_vertex_count; ++i)
|
|
|
|
vertices[i] = *dba.Transfer(vertices[i]);
|
|
|
|
|
|
|
|
LOG(INFO) << "Generating " << FLAGS_edge_count << " edges...";
|
|
|
|
auto edge_ids = make_id_vector(FLAGS_edge_count);
|
|
|
|
std::mt19937 pseudo_rand_gen{std::random_device{}()};
|
|
|
|
std::uniform_int_distribution<> rand_dist{0, FLAGS_vertex_count - 1};
|
|
|
|
auto edge_type = dba.EdgeType("edge");
|
|
|
|
utils::Timer timer;
|
|
|
|
for (int i = 0; i < FLAGS_edge_count; ++i)
|
|
|
|
dba.InsertEdge(vertices[rand_dist(pseudo_rand_gen)],
|
|
|
|
vertices[rand_dist(pseudo_rand_gen)], edge_type,
|
|
|
|
edge_ids[i]);
|
|
|
|
dba.Commit();
|
|
|
|
LOG(INFO) << "Generated " << FLAGS_edge_count << " edges in "
|
|
|
|
<< timer.Elapsed().count() << " seconds.";
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
auto EdgeIteration(GraphDb &db) {
|
|
|
|
GraphDbAccessor dba{db};
|
|
|
|
int64_t sum{0};
|
2017-12-05 17:32:31 +08:00
|
|
|
for (auto edge : dba.Edges(false)) sum += edge.from().gid() + edge.to().gid();
|
Add parallel customers/Otto test
Summary:
Looking for connected components in a random graph. This test performs the following:
- Generates a random graph that is NOT sequential in memory (otherwise itertion over edges is 2 or more times faster).
- Connectivity by iterating over all the edges.
- Ditto over vertices.
- Ditto over vertices in parallel.
Not done:
- Edge filtering based on XY. I could/should add that to see how it affects perf.
- Getting component info out from union-find.
Local results are encouraging. Iterating over the graph is the bottleneck. Still, I get connectivity of 10M vertices/edges in <7sec (parallel over vertices). Will test on 250M remote now.
Locally obtained results (20M/20M, 2 threads)
```
I1115 14:57:55.136875 357 otto_parallel.cpp:50] Generating 2000000 vertices...
I1115 14:58:19.057734 357 otto_parallel.cpp:74] Generated 2000000 vertices in 23.9208 seconds.
I1115 14:58:19.919221 357 otto_parallel.cpp:82] Generating 2000000 edges...
I1115 14:58:39.519951 357 otto_parallel.cpp:93] Generated 2000000 edges in 19.3398 seconds.
I1115 14:58:39.520349 357 otto_parallel.cpp:196] Running Edge iteration...
I1115 14:58:43.857264 357 otto_parallel.cpp:199] Done in 4.33691 seconds, result: 3999860270398
I1115 14:58:43.857316 357 otto_parallel.cpp:196] Running Vertex iteration...
I1115 14:58:49.498181 357 otto_parallel.cpp:199] Done in 5.64087 seconds, result: 4000090070787
I1115 14:58:49.498208 357 otto_parallel.cpp:196] Running Connected components - Edges...
I1115 14:58:54.232530 357 otto_parallel.cpp:199] Done in 4.73433 seconds, result: 323935
I1115 14:58:54.232570 357 otto_parallel.cpp:196] Running Connected components - Vertices...
I1115 14:59:00.412395 357 otto_parallel.cpp:199] Done in 6.17983 seconds, result: 323935
I1115 14:59:00.412422 357 otto_parallel.cpp:196] Running Parallel connected components - Vertices...
I1115 14:59:04.662087 357 otto_parallel.cpp:199] Done in 4.24967 seconds, result: 323935
I1115 14:59:04.662116 357 otto_parallel.cpp:196] Running Expansion...
I1115 14:59:13.913015 357 otto_parallel.cpp:199] Done in 9.25091 seconds, result: 323935
```
Reviewers: buda, mislav.bradac, dgleich, teon.banek
Reviewed By: buda, teon.banek
Subscribers: teon.banek, pullbot
Differential Revision: https://phabricator.memgraph.io/D982
2017-11-22 17:04:12 +08:00
|
|
|
return sum;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto VertexIteration(GraphDb &db) {
|
|
|
|
GraphDbAccessor dba{db};
|
|
|
|
int64_t sum{0};
|
|
|
|
for (auto v : dba.Vertices(false))
|
2017-12-05 17:32:31 +08:00
|
|
|
for (auto e : v.out()) sum += e.gid() + e.to().gid();
|
Add parallel customers/Otto test
Summary:
Looking for connected components in a random graph. This test performs the following:
- Generates a random graph that is NOT sequential in memory (otherwise itertion over edges is 2 or more times faster).
- Connectivity by iterating over all the edges.
- Ditto over vertices.
- Ditto over vertices in parallel.
Not done:
- Edge filtering based on XY. I could/should add that to see how it affects perf.
- Getting component info out from union-find.
Local results are encouraging. Iterating over the graph is the bottleneck. Still, I get connectivity of 10M vertices/edges in <7sec (parallel over vertices). Will test on 250M remote now.
Locally obtained results (20M/20M, 2 threads)
```
I1115 14:57:55.136875 357 otto_parallel.cpp:50] Generating 2000000 vertices...
I1115 14:58:19.057734 357 otto_parallel.cpp:74] Generated 2000000 vertices in 23.9208 seconds.
I1115 14:58:19.919221 357 otto_parallel.cpp:82] Generating 2000000 edges...
I1115 14:58:39.519951 357 otto_parallel.cpp:93] Generated 2000000 edges in 19.3398 seconds.
I1115 14:58:39.520349 357 otto_parallel.cpp:196] Running Edge iteration...
I1115 14:58:43.857264 357 otto_parallel.cpp:199] Done in 4.33691 seconds, result: 3999860270398
I1115 14:58:43.857316 357 otto_parallel.cpp:196] Running Vertex iteration...
I1115 14:58:49.498181 357 otto_parallel.cpp:199] Done in 5.64087 seconds, result: 4000090070787
I1115 14:58:49.498208 357 otto_parallel.cpp:196] Running Connected components - Edges...
I1115 14:58:54.232530 357 otto_parallel.cpp:199] Done in 4.73433 seconds, result: 323935
I1115 14:58:54.232570 357 otto_parallel.cpp:196] Running Connected components - Vertices...
I1115 14:59:00.412395 357 otto_parallel.cpp:199] Done in 6.17983 seconds, result: 323935
I1115 14:59:00.412422 357 otto_parallel.cpp:196] Running Parallel connected components - Vertices...
I1115 14:59:04.662087 357 otto_parallel.cpp:199] Done in 4.24967 seconds, result: 323935
I1115 14:59:04.662116 357 otto_parallel.cpp:196] Running Expansion...
I1115 14:59:13.913015 357 otto_parallel.cpp:199] Done in 9.25091 seconds, result: 323935
```
Reviewers: buda, mislav.bradac, dgleich, teon.banek
Reviewed By: buda, teon.banek
Subscribers: teon.banek, pullbot
Differential Revision: https://phabricator.memgraph.io/D982
2017-11-22 17:04:12 +08:00
|
|
|
return sum;
|
|
|
|
}
|
|
|
|
|
|
|
|
auto ConnectedComponentsEdges(GraphDb &db) {
|
|
|
|
UnionFind<int64_t> connectivity{FLAGS_vertex_count};
|
|
|
|
GraphDbAccessor dba{db};
|
|
|
|
for (auto edge : dba.Edges(false))
|
2017-12-05 17:32:31 +08:00
|
|
|
connectivity.Connect(edge.from().gid(), edge.to().gid());
|
Add parallel customers/Otto test
Summary:
Looking for connected components in a random graph. This test performs the following:
- Generates a random graph that is NOT sequential in memory (otherwise itertion over edges is 2 or more times faster).
- Connectivity by iterating over all the edges.
- Ditto over vertices.
- Ditto over vertices in parallel.
Not done:
- Edge filtering based on XY. I could/should add that to see how it affects perf.
- Getting component info out from union-find.
Local results are encouraging. Iterating over the graph is the bottleneck. Still, I get connectivity of 10M vertices/edges in <7sec (parallel over vertices). Will test on 250M remote now.
Locally obtained results (20M/20M, 2 threads)
```
I1115 14:57:55.136875 357 otto_parallel.cpp:50] Generating 2000000 vertices...
I1115 14:58:19.057734 357 otto_parallel.cpp:74] Generated 2000000 vertices in 23.9208 seconds.
I1115 14:58:19.919221 357 otto_parallel.cpp:82] Generating 2000000 edges...
I1115 14:58:39.519951 357 otto_parallel.cpp:93] Generated 2000000 edges in 19.3398 seconds.
I1115 14:58:39.520349 357 otto_parallel.cpp:196] Running Edge iteration...
I1115 14:58:43.857264 357 otto_parallel.cpp:199] Done in 4.33691 seconds, result: 3999860270398
I1115 14:58:43.857316 357 otto_parallel.cpp:196] Running Vertex iteration...
I1115 14:58:49.498181 357 otto_parallel.cpp:199] Done in 5.64087 seconds, result: 4000090070787
I1115 14:58:49.498208 357 otto_parallel.cpp:196] Running Connected components - Edges...
I1115 14:58:54.232530 357 otto_parallel.cpp:199] Done in 4.73433 seconds, result: 323935
I1115 14:58:54.232570 357 otto_parallel.cpp:196] Running Connected components - Vertices...
I1115 14:59:00.412395 357 otto_parallel.cpp:199] Done in 6.17983 seconds, result: 323935
I1115 14:59:00.412422 357 otto_parallel.cpp:196] Running Parallel connected components - Vertices...
I1115 14:59:04.662087 357 otto_parallel.cpp:199] Done in 4.24967 seconds, result: 323935
I1115 14:59:04.662116 357 otto_parallel.cpp:196] Running Expansion...
I1115 14:59:13.913015 357 otto_parallel.cpp:199] Done in 9.25091 seconds, result: 323935
```
Reviewers: buda, mislav.bradac, dgleich, teon.banek
Reviewed By: buda, teon.banek
Subscribers: teon.banek, pullbot
Differential Revision: https://phabricator.memgraph.io/D982
2017-11-22 17:04:12 +08:00
|
|
|
return connectivity.Size();
|
|
|
|
}
|
|
|
|
|
|
|
|
auto ConnectedComponentsVertices(GraphDb &db) {
|
|
|
|
UnionFind<int64_t> connectivity{FLAGS_vertex_count};
|
|
|
|
GraphDbAccessor dba{db};
|
|
|
|
for (auto from : dba.Vertices(false)) {
|
|
|
|
for (auto out_edge : from.out())
|
2017-12-05 17:32:31 +08:00
|
|
|
connectivity.Connect(from.gid(), out_edge.to().gid());
|
Add parallel customers/Otto test
Summary:
Looking for connected components in a random graph. This test performs the following:
- Generates a random graph that is NOT sequential in memory (otherwise itertion over edges is 2 or more times faster).
- Connectivity by iterating over all the edges.
- Ditto over vertices.
- Ditto over vertices in parallel.
Not done:
- Edge filtering based on XY. I could/should add that to see how it affects perf.
- Getting component info out from union-find.
Local results are encouraging. Iterating over the graph is the bottleneck. Still, I get connectivity of 10M vertices/edges in <7sec (parallel over vertices). Will test on 250M remote now.
Locally obtained results (20M/20M, 2 threads)
```
I1115 14:57:55.136875 357 otto_parallel.cpp:50] Generating 2000000 vertices...
I1115 14:58:19.057734 357 otto_parallel.cpp:74] Generated 2000000 vertices in 23.9208 seconds.
I1115 14:58:19.919221 357 otto_parallel.cpp:82] Generating 2000000 edges...
I1115 14:58:39.519951 357 otto_parallel.cpp:93] Generated 2000000 edges in 19.3398 seconds.
I1115 14:58:39.520349 357 otto_parallel.cpp:196] Running Edge iteration...
I1115 14:58:43.857264 357 otto_parallel.cpp:199] Done in 4.33691 seconds, result: 3999860270398
I1115 14:58:43.857316 357 otto_parallel.cpp:196] Running Vertex iteration...
I1115 14:58:49.498181 357 otto_parallel.cpp:199] Done in 5.64087 seconds, result: 4000090070787
I1115 14:58:49.498208 357 otto_parallel.cpp:196] Running Connected components - Edges...
I1115 14:58:54.232530 357 otto_parallel.cpp:199] Done in 4.73433 seconds, result: 323935
I1115 14:58:54.232570 357 otto_parallel.cpp:196] Running Connected components - Vertices...
I1115 14:59:00.412395 357 otto_parallel.cpp:199] Done in 6.17983 seconds, result: 323935
I1115 14:59:00.412422 357 otto_parallel.cpp:196] Running Parallel connected components - Vertices...
I1115 14:59:04.662087 357 otto_parallel.cpp:199] Done in 4.24967 seconds, result: 323935
I1115 14:59:04.662116 357 otto_parallel.cpp:196] Running Expansion...
I1115 14:59:13.913015 357 otto_parallel.cpp:199] Done in 9.25091 seconds, result: 323935
```
Reviewers: buda, mislav.bradac, dgleich, teon.banek
Reviewed By: buda, teon.banek
Subscribers: teon.banek, pullbot
Differential Revision: https://phabricator.memgraph.io/D982
2017-11-22 17:04:12 +08:00
|
|
|
}
|
|
|
|
return connectivity.Size();
|
|
|
|
}
|
|
|
|
|
|
|
|
auto ConnectedComponentsVerticesParallel(GraphDb &db) {
|
|
|
|
UnionFind<int64_t> connectivity{FLAGS_vertex_count};
|
|
|
|
SpinLock connectivity_lock;
|
|
|
|
|
|
|
|
// Define bounds of vertex IDs for each thread to use.
|
|
|
|
std::vector<PropertyValue> bounds;
|
|
|
|
for (int64_t i = 0; i < FLAGS_thread_count; ++i)
|
|
|
|
bounds.emplace_back(i * FLAGS_vertex_count / FLAGS_thread_count);
|
|
|
|
bounds.emplace_back(std::numeric_limits<int64_t>::max());
|
|
|
|
|
|
|
|
std::vector<std::thread> threads;
|
|
|
|
for (int i = 0; i < FLAGS_thread_count; ++i) {
|
|
|
|
threads.emplace_back(
|
|
|
|
[&connectivity, &connectivity_lock, &bounds, &db, i]() {
|
|
|
|
GraphDbAccessor dba{db};
|
|
|
|
for (auto from :
|
|
|
|
dba.Vertices(dba.Label(kLabel), dba.Property(kProperty),
|
|
|
|
utils::MakeBoundInclusive(bounds[i]),
|
|
|
|
utils::MakeBoundExclusive(bounds[i + 1]), false)) {
|
|
|
|
for (auto out_edge : from.out()) {
|
|
|
|
std::lock_guard<SpinLock> lock{connectivity_lock};
|
2017-12-05 17:32:31 +08:00
|
|
|
connectivity.Connect(from.gid(), out_edge.to().gid());
|
Add parallel customers/Otto test
Summary:
Looking for connected components in a random graph. This test performs the following:
- Generates a random graph that is NOT sequential in memory (otherwise itertion over edges is 2 or more times faster).
- Connectivity by iterating over all the edges.
- Ditto over vertices.
- Ditto over vertices in parallel.
Not done:
- Edge filtering based on XY. I could/should add that to see how it affects perf.
- Getting component info out from union-find.
Local results are encouraging. Iterating over the graph is the bottleneck. Still, I get connectivity of 10M vertices/edges in <7sec (parallel over vertices). Will test on 250M remote now.
Locally obtained results (20M/20M, 2 threads)
```
I1115 14:57:55.136875 357 otto_parallel.cpp:50] Generating 2000000 vertices...
I1115 14:58:19.057734 357 otto_parallel.cpp:74] Generated 2000000 vertices in 23.9208 seconds.
I1115 14:58:19.919221 357 otto_parallel.cpp:82] Generating 2000000 edges...
I1115 14:58:39.519951 357 otto_parallel.cpp:93] Generated 2000000 edges in 19.3398 seconds.
I1115 14:58:39.520349 357 otto_parallel.cpp:196] Running Edge iteration...
I1115 14:58:43.857264 357 otto_parallel.cpp:199] Done in 4.33691 seconds, result: 3999860270398
I1115 14:58:43.857316 357 otto_parallel.cpp:196] Running Vertex iteration...
I1115 14:58:49.498181 357 otto_parallel.cpp:199] Done in 5.64087 seconds, result: 4000090070787
I1115 14:58:49.498208 357 otto_parallel.cpp:196] Running Connected components - Edges...
I1115 14:58:54.232530 357 otto_parallel.cpp:199] Done in 4.73433 seconds, result: 323935
I1115 14:58:54.232570 357 otto_parallel.cpp:196] Running Connected components - Vertices...
I1115 14:59:00.412395 357 otto_parallel.cpp:199] Done in 6.17983 seconds, result: 323935
I1115 14:59:00.412422 357 otto_parallel.cpp:196] Running Parallel connected components - Vertices...
I1115 14:59:04.662087 357 otto_parallel.cpp:199] Done in 4.24967 seconds, result: 323935
I1115 14:59:04.662116 357 otto_parallel.cpp:196] Running Expansion...
I1115 14:59:13.913015 357 otto_parallel.cpp:199] Done in 9.25091 seconds, result: 323935
```
Reviewers: buda, mislav.bradac, dgleich, teon.banek
Reviewed By: buda, teon.banek
Subscribers: teon.banek, pullbot
Differential Revision: https://phabricator.memgraph.io/D982
2017-11-22 17:04:12 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
});
|
|
|
|
}
|
|
|
|
for (auto &t : threads) t.join();
|
|
|
|
return connectivity.Size();
|
|
|
|
}
|
|
|
|
|
|
|
|
auto Expansion(GraphDb &db) {
|
|
|
|
std::vector<int> component_ids(FLAGS_vertex_count, -1);
|
|
|
|
int next_component_id{0};
|
|
|
|
std::stack<VertexAccessor> expansion_stack;
|
|
|
|
GraphDbAccessor dba{db};
|
|
|
|
for (auto v : dba.Vertices(false)) {
|
2017-12-05 17:32:31 +08:00
|
|
|
if (component_ids[v.gid()] != -1) continue;
|
Add parallel customers/Otto test
Summary:
Looking for connected components in a random graph. This test performs the following:
- Generates a random graph that is NOT sequential in memory (otherwise itertion over edges is 2 or more times faster).
- Connectivity by iterating over all the edges.
- Ditto over vertices.
- Ditto over vertices in parallel.
Not done:
- Edge filtering based on XY. I could/should add that to see how it affects perf.
- Getting component info out from union-find.
Local results are encouraging. Iterating over the graph is the bottleneck. Still, I get connectivity of 10M vertices/edges in <7sec (parallel over vertices). Will test on 250M remote now.
Locally obtained results (20M/20M, 2 threads)
```
I1115 14:57:55.136875 357 otto_parallel.cpp:50] Generating 2000000 vertices...
I1115 14:58:19.057734 357 otto_parallel.cpp:74] Generated 2000000 vertices in 23.9208 seconds.
I1115 14:58:19.919221 357 otto_parallel.cpp:82] Generating 2000000 edges...
I1115 14:58:39.519951 357 otto_parallel.cpp:93] Generated 2000000 edges in 19.3398 seconds.
I1115 14:58:39.520349 357 otto_parallel.cpp:196] Running Edge iteration...
I1115 14:58:43.857264 357 otto_parallel.cpp:199] Done in 4.33691 seconds, result: 3999860270398
I1115 14:58:43.857316 357 otto_parallel.cpp:196] Running Vertex iteration...
I1115 14:58:49.498181 357 otto_parallel.cpp:199] Done in 5.64087 seconds, result: 4000090070787
I1115 14:58:49.498208 357 otto_parallel.cpp:196] Running Connected components - Edges...
I1115 14:58:54.232530 357 otto_parallel.cpp:199] Done in 4.73433 seconds, result: 323935
I1115 14:58:54.232570 357 otto_parallel.cpp:196] Running Connected components - Vertices...
I1115 14:59:00.412395 357 otto_parallel.cpp:199] Done in 6.17983 seconds, result: 323935
I1115 14:59:00.412422 357 otto_parallel.cpp:196] Running Parallel connected components - Vertices...
I1115 14:59:04.662087 357 otto_parallel.cpp:199] Done in 4.24967 seconds, result: 323935
I1115 14:59:04.662116 357 otto_parallel.cpp:196] Running Expansion...
I1115 14:59:13.913015 357 otto_parallel.cpp:199] Done in 9.25091 seconds, result: 323935
```
Reviewers: buda, mislav.bradac, dgleich, teon.banek
Reviewed By: buda, teon.banek
Subscribers: teon.banek, pullbot
Differential Revision: https://phabricator.memgraph.io/D982
2017-11-22 17:04:12 +08:00
|
|
|
auto component_id = next_component_id++;
|
|
|
|
expansion_stack.push(v);
|
|
|
|
while (!expansion_stack.empty()) {
|
|
|
|
auto next_v = expansion_stack.top();
|
|
|
|
expansion_stack.pop();
|
2017-12-05 17:32:31 +08:00
|
|
|
if (component_ids[next_v.gid()] != -1) continue;
|
|
|
|
component_ids[next_v.gid()] = component_id;
|
|
|
|
for (auto e : next_v.out()) expansion_stack.push(e.to());
|
|
|
|
for (auto e : next_v.in()) expansion_stack.push(e.from());
|
Add parallel customers/Otto test
Summary:
Looking for connected components in a random graph. This test performs the following:
- Generates a random graph that is NOT sequential in memory (otherwise itertion over edges is 2 or more times faster).
- Connectivity by iterating over all the edges.
- Ditto over vertices.
- Ditto over vertices in parallel.
Not done:
- Edge filtering based on XY. I could/should add that to see how it affects perf.
- Getting component info out from union-find.
Local results are encouraging. Iterating over the graph is the bottleneck. Still, I get connectivity of 10M vertices/edges in <7sec (parallel over vertices). Will test on 250M remote now.
Locally obtained results (20M/20M, 2 threads)
```
I1115 14:57:55.136875 357 otto_parallel.cpp:50] Generating 2000000 vertices...
I1115 14:58:19.057734 357 otto_parallel.cpp:74] Generated 2000000 vertices in 23.9208 seconds.
I1115 14:58:19.919221 357 otto_parallel.cpp:82] Generating 2000000 edges...
I1115 14:58:39.519951 357 otto_parallel.cpp:93] Generated 2000000 edges in 19.3398 seconds.
I1115 14:58:39.520349 357 otto_parallel.cpp:196] Running Edge iteration...
I1115 14:58:43.857264 357 otto_parallel.cpp:199] Done in 4.33691 seconds, result: 3999860270398
I1115 14:58:43.857316 357 otto_parallel.cpp:196] Running Vertex iteration...
I1115 14:58:49.498181 357 otto_parallel.cpp:199] Done in 5.64087 seconds, result: 4000090070787
I1115 14:58:49.498208 357 otto_parallel.cpp:196] Running Connected components - Edges...
I1115 14:58:54.232530 357 otto_parallel.cpp:199] Done in 4.73433 seconds, result: 323935
I1115 14:58:54.232570 357 otto_parallel.cpp:196] Running Connected components - Vertices...
I1115 14:59:00.412395 357 otto_parallel.cpp:199] Done in 6.17983 seconds, result: 323935
I1115 14:59:00.412422 357 otto_parallel.cpp:196] Running Parallel connected components - Vertices...
I1115 14:59:04.662087 357 otto_parallel.cpp:199] Done in 4.24967 seconds, result: 323935
I1115 14:59:04.662116 357 otto_parallel.cpp:196] Running Expansion...
I1115 14:59:13.913015 357 otto_parallel.cpp:199] Done in 9.25091 seconds, result: 323935
```
Reviewers: buda, mislav.bradac, dgleich, teon.banek
Reviewed By: buda, teon.banek
Subscribers: teon.banek, pullbot
Differential Revision: https://phabricator.memgraph.io/D982
2017-11-22 17:04:12 +08:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return next_component_id;
|
|
|
|
}
|
|
|
|
|
|
|
|
int main(int argc, char **argv) {
|
|
|
|
gflags::ParseCommandLineFlags(&argc, &argv, true);
|
|
|
|
google::InitGoogleLogging(argv[0]);
|
|
|
|
FLAGS_gc_cycle_sec = -1;
|
|
|
|
|
|
|
|
GraphDb db;
|
|
|
|
GenerateGraph(db);
|
|
|
|
auto timed_call = [&db](auto callable, const std::string &descr) {
|
|
|
|
LOG(INFO) << "Running " << descr << "...";
|
|
|
|
utils::Timer timer;
|
|
|
|
auto result = callable(db);
|
|
|
|
LOG(INFO) << "\tDone in " << timer.Elapsed().count()
|
|
|
|
<< " seconds, result: " << result;
|
|
|
|
};
|
|
|
|
timed_call(EdgeIteration, "Edge iteration");
|
|
|
|
timed_call(VertexIteration, "Vertex iteration");
|
|
|
|
timed_call(ConnectedComponentsEdges, "Connected components - Edges");
|
|
|
|
timed_call(ConnectedComponentsVertices, "Connected components - Vertices");
|
|
|
|
timed_call(ConnectedComponentsVerticesParallel,
|
|
|
|
"Parallel connected components - Vertices");
|
|
|
|
timed_call(Expansion, "Expansion");
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|