Implement Louvain community detection algorithm
Summary: Louvain implementation according to original paper. Reviewers: dsantl Reviewed By: dsantl Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2556
This commit is contained in:
parent
29590f1112
commit
ae0be9032e
18
query_modules/louvain/src/algorithms/algorithms.hpp
Normal file
18
query_modules/louvain/src/algorithms/algorithms.hpp
Normal file
@ -0,0 +1,18 @@
|
|||||||
|
/// @file
|
||||||
|
///
|
||||||
|
/// The file contains function declarations of several community-detection
|
||||||
|
/// graph algorithms.
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include "data_structures/graph.hpp"
|
||||||
|
|
||||||
|
namespace algorithms {
|
||||||
|
/// Detects communities of an unidrected, weighted graph using the Louvain
|
||||||
|
/// algorithm. The algorithm attempts to maximze the modularity of a weighted
|
||||||
|
/// graph.
|
||||||
|
///
|
||||||
|
/// @param G pointer to an undirected, weighted graph which may contain
|
||||||
|
/// self-loops.
|
||||||
|
void Louvain(comdata::Graph *G);
|
||||||
|
} // namespace algorithms
|
89
query_modules/louvain/src/algorithms/louvain.cpp
Normal file
89
query_modules/louvain/src/algorithms/louvain.cpp
Normal file
@ -0,0 +1,89 @@
|
|||||||
|
#include "algorithms/algorithms.hpp"
|
||||||
|
|
||||||
|
#include <algorithm>
|
||||||
|
#include <map>
|
||||||
|
#include <random>
|
||||||
|
#include <unordered_map>
|
||||||
|
|
||||||
|
#include <glog/logging.h>
|
||||||
|
|
||||||
|
namespace {
|
||||||
|
|
||||||
|
void OptimizeLocally(comdata::Graph *G) {
|
||||||
|
// We will consider local optimizations uniformly at random.
|
||||||
|
std::random_device rd;
|
||||||
|
std::mt19937 g(rd());
|
||||||
|
std::vector<uint32_t> p(G->Size());
|
||||||
|
std::iota(p.begin(), p.end(), 0);
|
||||||
|
std::shuffle(p.begin(), p.end(), g);
|
||||||
|
|
||||||
|
double total_w = G->TotalWeight();
|
||||||
|
bool stable = false;
|
||||||
|
while (!stable) {
|
||||||
|
stable = true;
|
||||||
|
for (uint32_t node_id : p) {
|
||||||
|
std::unordered_map<uint32_t, double> c_contrib;
|
||||||
|
c_contrib[G->Community(node_id)] = 0;
|
||||||
|
for (const auto &neigh : G->Neighbours(node_id)) {
|
||||||
|
uint32_t nxt_id = neigh.dest;
|
||||||
|
double weight = neigh.weight;
|
||||||
|
double contrib = weight - G->IncidentWeight(node_id) *
|
||||||
|
G->IncidentWeight(nxt_id) / (2 * total_w);
|
||||||
|
c_contrib[G->Community(nxt_id)] += contrib;
|
||||||
|
}
|
||||||
|
|
||||||
|
auto best_c = std::max_element(c_contrib.begin(), c_contrib.end(),
|
||||||
|
[](const std::pair<uint32_t, double> &p1,
|
||||||
|
const std::pair<uint32_t, double> &p2) {
|
||||||
|
return p1.second < p2.second;
|
||||||
|
});
|
||||||
|
|
||||||
|
if (best_c->second - c_contrib[G->Community(node_id)] > 0) {
|
||||||
|
G->SetCommunity(node_id, best_c->first);
|
||||||
|
stable = false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
} // anonymous namespace
|
||||||
|
|
||||||
|
namespace algorithms {
|
||||||
|
|
||||||
|
void Louvain(comdata::Graph *G) {
|
||||||
|
OptimizeLocally(G);
|
||||||
|
|
||||||
|
// Collapse the locally optimized graph.
|
||||||
|
uint32_t collapsed_nodes = G->NormalizeCommunities();
|
||||||
|
if (collapsed_nodes == G->Size()) return;
|
||||||
|
comdata::Graph collapsed_G(collapsed_nodes);
|
||||||
|
std::map<std::pair<uint32_t, uint32_t>, double> collapsed_edges;
|
||||||
|
|
||||||
|
for (uint32_t node_id = 0; node_id < G->Size(); ++node_id) {
|
||||||
|
std::unordered_map<uint32_t, double> edges;
|
||||||
|
for (const auto &neigh : G->Neighbours(node_id)) {
|
||||||
|
uint32_t nxt_id = neigh.dest;
|
||||||
|
double weight = neigh.weight;
|
||||||
|
if (G->Community(nxt_id) < G->Community(node_id)) continue;
|
||||||
|
edges[G->Community(nxt_id)] += weight;
|
||||||
|
}
|
||||||
|
for (const auto &neigh : edges) {
|
||||||
|
uint32_t a = std::min(G->Community(node_id), neigh.first);
|
||||||
|
uint32_t b = std::max(G->Community(node_id), neigh.first);
|
||||||
|
collapsed_edges[{a, b}] += neigh.second;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
for (const auto &p : collapsed_edges)
|
||||||
|
collapsed_G.AddEdge(p.first.first, p.first.second, p.second);
|
||||||
|
|
||||||
|
// Repeat until no local optimizations can be found.
|
||||||
|
Louvain(&collapsed_G);
|
||||||
|
|
||||||
|
// Propagate results from collapsed graph.
|
||||||
|
for (uint32_t node_id = 0; node_id < G->Size(); ++node_id)
|
||||||
|
G->SetCommunity(node_id, collapsed_G.Community(G->Community(node_id)));
|
||||||
|
G->NormalizeCommunities();
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace algorithms
|
106
query_modules/louvain/src/data_structures/graph.cpp
Normal file
106
query_modules/louvain/src/data_structures/graph.cpp
Normal file
@ -0,0 +1,106 @@
|
|||||||
|
#include "data_structures/graph.hpp"
|
||||||
|
|
||||||
|
#include <numeric>
|
||||||
|
#include <unordered_map>
|
||||||
|
#include <unordered_set>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
#include <glog/logging.h>
|
||||||
|
|
||||||
|
namespace comdata {
|
||||||
|
|
||||||
|
Graph::Graph(uint32_t n_nodes) : n_nodes_(n_nodes), total_w_(0) {
|
||||||
|
adj_list_.resize(n_nodes, {});
|
||||||
|
inc_w_.resize(n_nodes, 0);
|
||||||
|
|
||||||
|
// each node starts as its own separate community.
|
||||||
|
community_.resize(n_nodes);
|
||||||
|
std::iota(community_.begin(), community_.end(), 0);
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t Graph::Size() const {
|
||||||
|
return n_nodes_;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t Graph::Community(uint32_t node) const {
|
||||||
|
CHECK(node < n_nodes_) << "Node index out of range";
|
||||||
|
return community_[node];
|
||||||
|
}
|
||||||
|
|
||||||
|
void Graph::SetCommunity(uint32_t node, uint32_t c) {
|
||||||
|
CHECK(node < n_nodes_) << "Node index out of range";
|
||||||
|
community_[node] = c;
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t Graph::NormalizeCommunities() {
|
||||||
|
std::set<uint32_t> c_id(community_.begin(), community_.end());
|
||||||
|
std::unordered_map<uint32_t, uint32_t> cmap;
|
||||||
|
uint32_t id = 0;
|
||||||
|
for (uint32_t c : c_id) {
|
||||||
|
cmap[c] = id;
|
||||||
|
++id;
|
||||||
|
}
|
||||||
|
for (uint32_t node_id = 0; node_id < n_nodes_; ++node_id)
|
||||||
|
community_[node_id] = cmap[community_[node_id]];
|
||||||
|
return id;
|
||||||
|
}
|
||||||
|
|
||||||
|
void Graph::AddEdge(uint32_t node1, uint32_t node2, double weight) {
|
||||||
|
CHECK(node1 < n_nodes_) << "Node index out of range";
|
||||||
|
CHECK(node2 < n_nodes_) << "Node index out of range";
|
||||||
|
CHECK(weight > 0) << "Weights must be positive";
|
||||||
|
CHECK(edges_.find({node1, node2}) == edges_.end()) << "Edge already exists";
|
||||||
|
|
||||||
|
edges_.emplace(node1, node2);
|
||||||
|
edges_.emplace(node2, node1);
|
||||||
|
|
||||||
|
total_w_ += weight;
|
||||||
|
|
||||||
|
adj_list_[node1].emplace_back(node2, weight);
|
||||||
|
inc_w_[node1] += weight;
|
||||||
|
|
||||||
|
if (node1 != node2) {
|
||||||
|
adj_list_[node2].emplace_back(node1, weight);
|
||||||
|
inc_w_[node2] += weight;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
uint32_t Graph::Degree(uint32_t node) const {
|
||||||
|
CHECK(node < n_nodes_) << "Node index out of range";
|
||||||
|
return static_cast<uint32_t>(adj_list_[node].size());
|
||||||
|
}
|
||||||
|
|
||||||
|
double Graph::IncidentWeight(uint32_t node) const {
|
||||||
|
CHECK(node < n_nodes_) << "Node index out of range";
|
||||||
|
return inc_w_[node];
|
||||||
|
}
|
||||||
|
|
||||||
|
double Graph::TotalWeight() const {
|
||||||
|
return total_w_;
|
||||||
|
}
|
||||||
|
|
||||||
|
double Graph::Modularity() const {
|
||||||
|
double ret = 0;
|
||||||
|
// Since all weights should be positive, this implies that our graph has
|
||||||
|
// no edges.
|
||||||
|
if (total_w_ == 0)
|
||||||
|
return 0;
|
||||||
|
|
||||||
|
for (uint32_t i = 0; i < n_nodes_; ++i) {
|
||||||
|
for (const auto &neigh : adj_list_[i]) {
|
||||||
|
uint32_t j = neigh.dest;
|
||||||
|
double w = neigh.weight;
|
||||||
|
if (Community(i) != Community(j)) continue;
|
||||||
|
ret += w - (IncidentWeight(i) * IncidentWeight(j) / (2.0 * total_w_));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
ret /= 2 * total_w_;
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
|
||||||
|
const std::vector<Neighbour>& Graph::Neighbours(uint32_t node) const {
|
||||||
|
CHECK(node < n_nodes_) << "Node index out of range";
|
||||||
|
return adj_list_[node];
|
||||||
|
}
|
||||||
|
|
||||||
|
} // namespace comdata
|
109
query_modules/louvain/src/data_structures/graph.hpp
Normal file
109
query_modules/louvain/src/data_structures/graph.hpp
Normal file
@ -0,0 +1,109 @@
|
|||||||
|
/// @file graph.hpp
|
||||||
|
|
||||||
|
#pragma once
|
||||||
|
|
||||||
|
#include <cstdint>
|
||||||
|
#include <set>
|
||||||
|
#include <vector>
|
||||||
|
|
||||||
|
namespace comdata {
|
||||||
|
|
||||||
|
struct Neighbour {
|
||||||
|
uint32_t dest;
|
||||||
|
double weight;
|
||||||
|
Neighbour(uint32_t d, double w) : dest(d), weight(w) {}
|
||||||
|
};
|
||||||
|
|
||||||
|
/// Class which models a weighted, undirected graph with necessary
|
||||||
|
/// functionalities for community detection algorithms.
|
||||||
|
class Graph {
|
||||||
|
public:
|
||||||
|
/// Constructs a new graph with a given number of nodes and no edges between
|
||||||
|
/// them.
|
||||||
|
///
|
||||||
|
/// At the moment, the implementation assumes (and enforces) that all nodes
|
||||||
|
/// are indexed from 0 to n_nodes - 1. This will be changed in the final
|
||||||
|
/// implementation. - TODO(ipaljak)
|
||||||
|
///
|
||||||
|
/// @param n_nodes Number of nodes in the graph.
|
||||||
|
explicit Graph(uint32_t n_nodes);
|
||||||
|
|
||||||
|
/// @return number of nodes in the graph.
|
||||||
|
uint32_t Size() const;
|
||||||
|
|
||||||
|
/// Adds a bidirectional, weighted edge to the graph between the given
|
||||||
|
/// nodes. If both given nodes are the same, the method inserts a weighted
|
||||||
|
/// self-loop.
|
||||||
|
///
|
||||||
|
/// There should be no edges between the given nodes when before invoking
|
||||||
|
/// this method.
|
||||||
|
///
|
||||||
|
/// @param node1 index of an incident node.
|
||||||
|
/// @param node2 index of an incident node.
|
||||||
|
/// @param weight real value which represents the weight of the edge.
|
||||||
|
void AddEdge(uint32_t node1, uint32_t node2, double weight);
|
||||||
|
|
||||||
|
/// @param node index of node.
|
||||||
|
/// @return community where the node belongs to.
|
||||||
|
uint32_t Community(uint32_t node) const;
|
||||||
|
|
||||||
|
/// Adds a given node to a given community.
|
||||||
|
///
|
||||||
|
/// @param node index of node.
|
||||||
|
/// @param c community where the given node should go in.
|
||||||
|
void SetCommunity(uint32_t node, uint32_t c);
|
||||||
|
|
||||||
|
/// Normalizes the values of communities. More precisely, after invoking this
|
||||||
|
/// method communities will be indexed by successive integers starting from 0.
|
||||||
|
///
|
||||||
|
/// Note: this method is computationally expensive and takes O(|V|)
|
||||||
|
/// time, i.e., it traverses all nodes in the graph.
|
||||||
|
///
|
||||||
|
/// @return number of communities in the graph
|
||||||
|
uint32_t NormalizeCommunities();
|
||||||
|
|
||||||
|
/// Returns the number of incident edges to a given node. Self-loops
|
||||||
|
/// contribute a single edge to the degree.
|
||||||
|
///
|
||||||
|
/// @param node index of node.
|
||||||
|
/// @return degree of given node.
|
||||||
|
uint32_t Degree(uint32_t node) const;
|
||||||
|
|
||||||
|
/// Returns the total weight of incident edges to a given node. Weight
|
||||||
|
/// of a self loop contributes once to the total sum.
|
||||||
|
///
|
||||||
|
/// @param node index of node.
|
||||||
|
/// @return total incident weight of a given node.
|
||||||
|
double IncidentWeight(uint32_t node) const;
|
||||||
|
|
||||||
|
/// @return total weight of all edges in a graph.
|
||||||
|
double TotalWeight() const;
|
||||||
|
|
||||||
|
/// Calculates the modularity of the graph which is defined as a real value
|
||||||
|
/// between -1 and 1 that measures the density of links inside communities
|
||||||
|
/// compared to links between communities.
|
||||||
|
///
|
||||||
|
/// Note: this method is computationally expensive and takes O(|V| + |E|)
|
||||||
|
/// time, i.e., it traverses the entire graph.
|
||||||
|
///
|
||||||
|
/// @return modularity of the graph.
|
||||||
|
double Modularity() const;
|
||||||
|
|
||||||
|
/// Returns nodes adjacent to a given node.
|
||||||
|
///
|
||||||
|
/// @param node index of node.
|
||||||
|
/// @return list of neighbouring nodes.
|
||||||
|
const std::vector<Neighbour>& Neighbours(uint32_t node) const;
|
||||||
|
|
||||||
|
private:
|
||||||
|
uint32_t n_nodes_;
|
||||||
|
double total_w_;
|
||||||
|
|
||||||
|
std::vector<std::vector<Neighbour>> adj_list_;
|
||||||
|
std::set<std::pair<uint32_t, uint32_t>> edges_;
|
||||||
|
|
||||||
|
std::vector<double> inc_w_;
|
||||||
|
std::vector<uint32_t> community_;
|
||||||
|
};
|
||||||
|
|
||||||
|
} // namespace comdata
|
Loading…
Reference in New Issue
Block a user