Implement Louvain as a query module
Reviewers: teon.banek, dsantl Reviewed By: teon.banek, dsantl Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2574
This commit is contained in:
parent
e605aed497
commit
f6b6ea254d
@ -7,7 +7,6 @@ include_directories(src)
|
|||||||
|
|
||||||
add_library(louvain-core STATIC ${SOURCES})
|
add_library(louvain-core STATIC ${SOURCES})
|
||||||
set_target_properties(louvain-core PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
set_target_properties(louvain-core PROPERTIES POSITION_INDEPENDENT_CODE ON)
|
||||||
target_link_libraries(louvain-core PUBLIC Threads::Threads glog gflags)
|
|
||||||
|
|
||||||
add_executable(louvain-main ${MAIN})
|
add_executable(louvain-main ${MAIN})
|
||||||
target_link_libraries(louvain-main louvain-core)
|
target_link_libraries(louvain-main louvain-core)
|
||||||
@ -16,6 +15,7 @@ enable_testing()
|
|||||||
add_subdirectory(test)
|
add_subdirectory(test)
|
||||||
|
|
||||||
add_library(louvain SHARED ${MODULE})
|
add_library(louvain SHARED ${MODULE})
|
||||||
|
target_link_libraries(louvain louvain-core)
|
||||||
target_include_directories(louvain PRIVATE ${CMAKE_SOURCE_DIR}/include)
|
target_include_directories(louvain PRIVATE ${CMAKE_SOURCE_DIR}/include)
|
||||||
|
|
||||||
if (NOT MG_COMMUNITY)
|
if (NOT MG_COMMUNITY)
|
||||||
|
@ -5,8 +5,6 @@
|
|||||||
#include <random>
|
#include <random>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
|
|
||||||
#include <glog/logging.h>
|
|
||||||
|
|
||||||
namespace {
|
namespace {
|
||||||
|
|
||||||
void OptimizeLocally(comdata::Graph *graph) {
|
void OptimizeLocally(comdata::Graph *graph) {
|
||||||
|
@ -1,12 +1,11 @@
|
|||||||
#include "data_structures/graph.hpp"
|
#include "data_structures/graph.hpp"
|
||||||
|
|
||||||
|
#include <exception>
|
||||||
#include <numeric>
|
#include <numeric>
|
||||||
#include <unordered_map>
|
#include <unordered_map>
|
||||||
#include <unordered_set>
|
#include <unordered_set>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
#include <glog/logging.h>
|
|
||||||
|
|
||||||
namespace comdata {
|
namespace comdata {
|
||||||
|
|
||||||
Graph::Graph(uint32_t n_nodes) : n_nodes_(n_nodes), total_w_(0) {
|
Graph::Graph(uint32_t n_nodes) : n_nodes_(n_nodes), total_w_(0) {
|
||||||
@ -22,15 +21,9 @@ uint32_t Graph::Size() const {
|
|||||||
return n_nodes_;
|
return n_nodes_;
|
||||||
}
|
}
|
||||||
|
|
||||||
uint32_t Graph::Community(uint32_t node) const {
|
uint32_t Graph::Community(uint32_t node) const { return community_.at(node); }
|
||||||
CHECK(node < n_nodes_) << "Node index out of range";
|
|
||||||
return community_[node];
|
|
||||||
}
|
|
||||||
|
|
||||||
void Graph::SetCommunity(uint32_t node, uint32_t c) {
|
void Graph::SetCommunity(uint32_t node, uint32_t c) { community_.at(node) = c; }
|
||||||
CHECK(node < n_nodes_) << "Node index out of range";
|
|
||||||
community_[node] = c;
|
|
||||||
}
|
|
||||||
|
|
||||||
uint32_t Graph::NormalizeCommunities() {
|
uint32_t Graph::NormalizeCommunities() {
|
||||||
std::set<uint32_t> c_id(community_.begin(), community_.end());
|
std::set<uint32_t> c_id(community_.begin(), community_.end());
|
||||||
@ -46,10 +39,11 @@ uint32_t Graph::NormalizeCommunities() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
void Graph::AddEdge(uint32_t node1, uint32_t node2, double weight) {
|
void Graph::AddEdge(uint32_t node1, uint32_t node2, double weight) {
|
||||||
CHECK(node1 < n_nodes_) << "Node index out of range";
|
if (node1 >= n_nodes_ || node2 >= n_nodes_)
|
||||||
CHECK(node2 < n_nodes_) << "Node index out of range";
|
throw std::out_of_range("Node index out of range");
|
||||||
CHECK(weight > 0) << "Weights must be positive";
|
if (weight <= 0) throw std::out_of_range("Weights must be positive");
|
||||||
CHECK(edges_.find({node1, node2}) == edges_.end()) << "Edge already exists";
|
if (edges_.find({node1, node2}) != edges_.end())
|
||||||
|
throw std::invalid_argument("Edge already exists");
|
||||||
|
|
||||||
edges_.emplace(node1, node2);
|
edges_.emplace(node1, node2);
|
||||||
edges_.emplace(node2, node1);
|
edges_.emplace(node2, node1);
|
||||||
@ -66,13 +60,11 @@ void Graph::AddEdge(uint32_t node1, uint32_t node2, double weight) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
uint32_t Graph::Degree(uint32_t node) const {
|
uint32_t Graph::Degree(uint32_t node) const {
|
||||||
CHECK(node < n_nodes_) << "Node index out of range";
|
return static_cast<uint32_t>(adj_list_.at(node).size());
|
||||||
return static_cast<uint32_t>(adj_list_[node].size());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
double Graph::IncidentWeight(uint32_t node) const {
|
double Graph::IncidentWeight(uint32_t node) const {
|
||||||
CHECK(node < n_nodes_) << "Node index out of range";
|
return inc_w_.at(node);
|
||||||
return inc_w_[node];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
double Graph::TotalWeight() const {
|
double Graph::TotalWeight() const {
|
||||||
@ -98,9 +90,8 @@ double Graph::Modularity() const {
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
const std::vector<Neighbour>& Graph::Neighbours(uint32_t node) const {
|
const std::vector<Neighbour> &Graph::Neighbours(uint32_t node) const {
|
||||||
CHECK(node < n_nodes_) << "Node index out of range";
|
return adj_list_.at(node);
|
||||||
return adj_list_[node];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} // namespace comdata
|
} // namespace comdata
|
||||||
|
@ -40,16 +40,24 @@ public:
|
|||||||
/// @param node1 index of an incident node.
|
/// @param node1 index of an incident node.
|
||||||
/// @param node2 index of an incident node.
|
/// @param node2 index of an incident node.
|
||||||
/// @param weight real value which represents the weight of the edge.
|
/// @param weight real value which represents the weight of the edge.
|
||||||
|
///
|
||||||
|
/// @throw std::out_of_range
|
||||||
|
/// @throw std::invalid_argument
|
||||||
void AddEdge(uint32_t node1, uint32_t node2, double weight);
|
void AddEdge(uint32_t node1, uint32_t node2, double weight);
|
||||||
|
|
||||||
/// @param node index of node.
|
/// @param node index of node.
|
||||||
|
///
|
||||||
/// @return community where the node belongs to.
|
/// @return community where the node belongs to.
|
||||||
|
///
|
||||||
|
/// @throw std::out_of_range
|
||||||
uint32_t Community(uint32_t node) const;
|
uint32_t Community(uint32_t node) const;
|
||||||
|
|
||||||
/// Adds a given node to a given community.
|
/// Adds a given node to a given community.
|
||||||
///
|
///
|
||||||
/// @param node index of node.
|
/// @param node index of node.
|
||||||
/// @param c community where the given node should go in.
|
/// @param c community where the given node should go in.
|
||||||
|
///
|
||||||
|
/// @throw std::out_of_range
|
||||||
void SetCommunity(uint32_t node, uint32_t c);
|
void SetCommunity(uint32_t node, uint32_t c);
|
||||||
|
|
||||||
/// Normalizes the values of communities. More precisely, after invoking this
|
/// Normalizes the values of communities. More precisely, after invoking this
|
||||||
@ -65,14 +73,20 @@ public:
|
|||||||
/// contribute a single edge to the degree.
|
/// contribute a single edge to the degree.
|
||||||
///
|
///
|
||||||
/// @param node index of node.
|
/// @param node index of node.
|
||||||
|
///
|
||||||
/// @return degree of given node.
|
/// @return degree of given node.
|
||||||
|
///
|
||||||
|
/// @throw std::out_of_range
|
||||||
uint32_t Degree(uint32_t node) const;
|
uint32_t Degree(uint32_t node) const;
|
||||||
|
|
||||||
/// Returns the total weight of incident edges to a given node. Weight
|
/// Returns the total weight of incident edges to a given node. Weight
|
||||||
/// of a self loop contributes once to the total sum.
|
/// of a self loop contributes once to the total sum.
|
||||||
///
|
///
|
||||||
/// @param node index of node.
|
/// @param node index of node.
|
||||||
|
///
|
||||||
/// @return total incident weight of a given node.
|
/// @return total incident weight of a given node.
|
||||||
|
///
|
||||||
|
/// @throw std::out_of_range
|
||||||
double IncidentWeight(uint32_t node) const;
|
double IncidentWeight(uint32_t node) const;
|
||||||
|
|
||||||
/// @return total weight of all edges in a graph.
|
/// @return total weight of all edges in a graph.
|
||||||
@ -91,7 +105,10 @@ public:
|
|||||||
/// Returns nodes adjacent to a given node.
|
/// Returns nodes adjacent to a given node.
|
||||||
///
|
///
|
||||||
/// @param node index of node.
|
/// @param node index of node.
|
||||||
|
///
|
||||||
/// @return list of neighbouring nodes.
|
/// @return list of neighbouring nodes.
|
||||||
|
///
|
||||||
|
/// @throw std::out_of_range
|
||||||
const std::vector<Neighbour>& Neighbours(uint32_t node) const;
|
const std::vector<Neighbour>& Neighbours(uint32_t node) const;
|
||||||
|
|
||||||
private:
|
private:
|
||||||
|
@ -1,19 +1,128 @@
|
|||||||
#include "mg_procedure.h"
|
#include "mg_procedure.h"
|
||||||
|
|
||||||
|
#include <exception>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include "algorithms/algorithms.hpp"
|
||||||
|
#include "data_structures/graph.hpp"
|
||||||
|
|
||||||
static void communities(const mgp_list *args, const mgp_graph *graph,
|
static void communities(const mgp_list *args, const mgp_graph *graph,
|
||||||
mgp_result *result, mgp_memory *memory) {
|
mgp_result *result, mgp_memory *memory) {
|
||||||
mgp_result_record *record = mgp_result_new_record(result);
|
mgp_vertices_iterator *vertices_iterator =
|
||||||
mgp_value *hello_world_value =
|
mgp_graph_iter_vertices(graph, memory);
|
||||||
mgp_value_make_string("Louvain, fuck yeah!", memory);
|
if (vertices_iterator == nullptr) {
|
||||||
mgp_result_record_insert(record, "result", hello_world_value);
|
mgp_result_set_error_msg(result, "Not enough memory!");
|
||||||
mgp_value_destroy(hello_world_value);
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Normalize vertex ids
|
||||||
|
std::unordered_map<int64_t, uint32_t> mem_to_louv_id;
|
||||||
|
|
||||||
|
uint32_t louv_id = 0;
|
||||||
|
for (const mgp_vertex *vertex = mgp_vertices_iterator_get(vertices_iterator);
|
||||||
|
vertex != nullptr; vertex = mgp_vertices_iterator_next(vertices_iterator)) {
|
||||||
|
mgp_vertex_id mem_id = mgp_vertex_get_id(vertex);
|
||||||
|
mem_to_louv_id[mem_id.as_int] = louv_id;
|
||||||
|
++louv_id;
|
||||||
|
}
|
||||||
|
|
||||||
|
mgp_vertices_iterator_destroy(vertices_iterator);
|
||||||
|
|
||||||
|
// Extract the graph structure
|
||||||
|
// TODO(ipaljak): consider filtering nodes and edges by labels.
|
||||||
|
comdata::Graph louvain_graph(louv_id);
|
||||||
|
for (const auto &p : mem_to_louv_id) {
|
||||||
|
mgp_vertex *vertex = mgp_graph_get_vertex_by_id(graph, {p.first}, memory);
|
||||||
|
if (!vertex) {
|
||||||
|
mgp_result_set_error_msg(result, "Not enough memory!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// iterate over inbound edges. This is enough because we will eventually
|
||||||
|
// iterate over outbound edges in another direction.
|
||||||
|
mgp_edges_iterator *edges_iterator =
|
||||||
|
mgp_vertex_iter_in_edges(vertex, memory);
|
||||||
|
if (edges_iterator == nullptr) {
|
||||||
|
mgp_vertex_destroy(vertex);
|
||||||
|
mgp_result_set_error_msg(result, "Not enough memory!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const mgp_edge *edge = mgp_edges_iterator_get(edges_iterator);
|
||||||
|
edge != nullptr; edge = mgp_edges_iterator_next(edges_iterator)) {
|
||||||
|
const mgp_vertex *next_vertex = mgp_edge_get_from(edge);
|
||||||
|
mgp_vertex_id next_mem_id = mgp_vertex_get_id(next_vertex);
|
||||||
|
uint32_t next_louv_id = mem_to_louv_id[next_mem_id.as_int];
|
||||||
|
|
||||||
|
// TODO(ipaljak): retrieve edge weight (default to 1)
|
||||||
|
double weight = 1;
|
||||||
|
|
||||||
|
try {
|
||||||
|
louvain_graph.AddEdge(p.second, next_louv_id, weight);
|
||||||
|
} catch (const std::exception &e) {
|
||||||
|
mgp_vertex_destroy(vertex);
|
||||||
|
mgp_edges_iterator_destroy(edges_iterator);
|
||||||
|
mgp_result_set_error_msg(result, e.what());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
mgp_vertex_destroy(vertex);
|
||||||
|
mgp_edges_iterator_destroy(edges_iterator);
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
algorithms::Louvain(&louvain_graph);
|
||||||
|
} catch (const std::exception &e) {
|
||||||
|
const auto msg = std::string("[Internal error] ") + e.what();
|
||||||
|
mgp_result_set_error_msg(result, msg.c_str());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return node ids and their corresponding communities.
|
||||||
|
for (const auto &p : mem_to_louv_id) {
|
||||||
|
mgp_result_record *record = mgp_result_new_record(result);
|
||||||
|
if (record == nullptr) {
|
||||||
|
mgp_result_set_error_msg(result, "Not enough memory!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
mgp_value *mem_id_value = mgp_value_make_int(p.first, memory);
|
||||||
|
if (mem_id_value == nullptr) {
|
||||||
|
mgp_result_set_error_msg(result, "Not enough memory!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
mgp_value *com_value =
|
||||||
|
mgp_value_make_int(louvain_graph.Community(p.second), memory);
|
||||||
|
if (com_value == nullptr) {
|
||||||
|
mgp_value_destroy(mem_id_value);
|
||||||
|
mgp_result_set_error_msg(result, "Not enough memory!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
int mem_id_inserted =
|
||||||
|
mgp_result_record_insert(record, "id", mem_id_value);
|
||||||
|
int com_inserted =
|
||||||
|
mgp_result_record_insert(record, "community", com_value);
|
||||||
|
|
||||||
|
mgp_value_destroy(mem_id_value);
|
||||||
|
mgp_value_destroy(com_value);
|
||||||
|
|
||||||
|
if (!mem_id_inserted || !com_inserted) {
|
||||||
|
mgp_result_set_error_msg(result, "Not enough memory!");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
extern "C" int mgp_init_module(struct mgp_module *module,
|
extern "C" int mgp_init_module(struct mgp_module *module,
|
||||||
struct mgp_memory *memory) {
|
struct mgp_memory *memory) {
|
||||||
struct mgp_proc *proc =
|
struct mgp_proc *proc =
|
||||||
mgp_module_add_read_procedure(module, "communities", communities);
|
mgp_module_add_read_procedure(module, "communities", communities);
|
||||||
if (!mgp_proc_add_result(proc, "result", mgp_type_string())) return 1;
|
if (!proc) return 1;
|
||||||
|
if (!mgp_proc_add_result(proc, "id", mgp_type_int())) return 1;
|
||||||
|
if (!mgp_proc_add_result(proc, "community", mgp_type_int())) return 1;
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -70,12 +70,12 @@ TEST(Graph, Communities) {
|
|||||||
for (int i = 0; i < 100; ++i) ASSERT_EQ(graph.Community(i), i % 5);
|
for (int i = 0; i < 100; ++i) ASSERT_EQ(graph.Community(i), i % 5);
|
||||||
|
|
||||||
// Try to set communities on non-existing nodes
|
// Try to set communities on non-existing nodes
|
||||||
ASSERT_DEATH({ graph.SetCommunity(100, 2); }, "");
|
EXPECT_THROW({ graph.SetCommunity(100, 2); }, std::out_of_range);
|
||||||
ASSERT_DEATH({ graph.SetCommunity(150, 0); }, "");
|
EXPECT_THROW({ graph.SetCommunity(150, 0); }, std::out_of_range);
|
||||||
|
|
||||||
// Try to get a the community of a non-existing node
|
// Try to get a the community of a non-existing node
|
||||||
ASSERT_DEATH({ graph.Community(100); }, "");
|
EXPECT_THROW({ graph.Community(100); }, std::out_of_range);
|
||||||
ASSERT_DEATH({ graph.Community(150); }, "");
|
EXPECT_THROW({ graph.Community(150); }, std::out_of_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Graph, CommunityNormalization) {
|
TEST(Graph, CommunityNormalization) {
|
||||||
@ -108,15 +108,15 @@ TEST(Graph, AddEdge) {
|
|||||||
comdata::Graph graph = GenRandomUnweightedGraph(5, 0);
|
comdata::Graph graph = GenRandomUnweightedGraph(5, 0);
|
||||||
|
|
||||||
// Node out of bounds.
|
// Node out of bounds.
|
||||||
ASSERT_DEATH({ graph.AddEdge(1, 5, 7); }, "");
|
EXPECT_THROW({ graph.AddEdge(1, 5, 7); }, std::out_of_range);
|
||||||
|
|
||||||
// Repeated edge
|
// Repeated edge
|
||||||
graph.AddEdge(1, 2, 1);
|
graph.AddEdge(1, 2, 1);
|
||||||
ASSERT_DEATH({ graph.AddEdge(1, 2, 7); }, "");
|
EXPECT_THROW({ graph.AddEdge(1, 2, 7); }, std::invalid_argument);
|
||||||
|
|
||||||
// Non-positive edge weight
|
// Non-positive edge weight
|
||||||
ASSERT_DEATH({ graph.AddEdge(2, 3, -7); }, "");
|
EXPECT_THROW({ graph.AddEdge(2, 3, -7); }, std::out_of_range);
|
||||||
ASSERT_DEATH({ graph.AddEdge(3, 4, 0); }, "");
|
EXPECT_THROW({ graph.AddEdge(3, 4, 0); }, std::out_of_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Graph, Degrees) {
|
TEST(Graph, Degrees) {
|
||||||
@ -183,8 +183,8 @@ TEST(Graph, Degrees) {
|
|||||||
ASSERT_TRUE(DegreeCheck(graph, deg));
|
ASSERT_TRUE(DegreeCheck(graph, deg));
|
||||||
|
|
||||||
// Try to get degree of non-existing nodes
|
// Try to get degree of non-existing nodes
|
||||||
ASSERT_DEATH({ graph.Degree(5); }, "");
|
EXPECT_THROW({ graph.Degree(5); }, std::out_of_range);
|
||||||
ASSERT_DEATH({ graph.Degree(100); }, "");
|
EXPECT_THROW({ graph.Degree(100); }, std::out_of_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Graph, Weights) {
|
TEST(Graph, Weights) {
|
||||||
@ -256,8 +256,8 @@ TEST(Graph, Weights) {
|
|||||||
EXPECT_NEAR(graph.TotalWeight(), 5.5, 1e-6);
|
EXPECT_NEAR(graph.TotalWeight(), 5.5, 1e-6);
|
||||||
|
|
||||||
// Try to get incident weight of non-existing node
|
// Try to get incident weight of non-existing node
|
||||||
ASSERT_DEATH({ graph.IncidentWeight(5); }, "");
|
EXPECT_THROW({ graph.IncidentWeight(5); }, std::out_of_range);
|
||||||
ASSERT_DEATH({ graph.IncidentWeight(100); }, "");
|
EXPECT_THROW({ graph.IncidentWeight(100); }, std::out_of_range);
|
||||||
}
|
}
|
||||||
|
|
||||||
TEST(Graph, Modularity) {
|
TEST(Graph, Modularity) {
|
||||||
|
Loading…
Reference in New Issue
Block a user