Implement Louvain community detection algorithm
Summary: Louvain implementation according to original paper. Reviewers: dsantl Reviewed By: dsantl Subscribers: pullbot Differential Revision: https://phabricator.memgraph.io/D2556
This commit is contained in:
parent
29590f1112
commit
ae0be9032e
18
query_modules/louvain/src/algorithms/algorithms.hpp
Normal file
18
query_modules/louvain/src/algorithms/algorithms.hpp
Normal file
@ -0,0 +1,18 @@
|
||||
/// @file
|
||||
///
|
||||
/// The file contains function declarations of several community-detection
|
||||
/// graph algorithms.
|
||||
|
||||
#pragma once
|
||||
|
||||
#include "data_structures/graph.hpp"
|
||||
|
||||
namespace algorithms {
|
||||
/// Detects communities of an unidrected, weighted graph using the Louvain
|
||||
/// algorithm. The algorithm attempts to maximze the modularity of a weighted
|
||||
/// graph.
|
||||
///
|
||||
/// @param G pointer to an undirected, weighted graph which may contain
|
||||
/// self-loops.
|
||||
void Louvain(comdata::Graph *G);
|
||||
} // namespace algorithms
|
89
query_modules/louvain/src/algorithms/louvain.cpp
Normal file
89
query_modules/louvain/src/algorithms/louvain.cpp
Normal file
@ -0,0 +1,89 @@
|
||||
#include "algorithms/algorithms.hpp"
|
||||
|
||||
#include <algorithm>
|
||||
#include <map>
|
||||
#include <random>
|
||||
#include <unordered_map>
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
namespace {
|
||||
|
||||
void OptimizeLocally(comdata::Graph *G) {
|
||||
// We will consider local optimizations uniformly at random.
|
||||
std::random_device rd;
|
||||
std::mt19937 g(rd());
|
||||
std::vector<uint32_t> p(G->Size());
|
||||
std::iota(p.begin(), p.end(), 0);
|
||||
std::shuffle(p.begin(), p.end(), g);
|
||||
|
||||
double total_w = G->TotalWeight();
|
||||
bool stable = false;
|
||||
while (!stable) {
|
||||
stable = true;
|
||||
for (uint32_t node_id : p) {
|
||||
std::unordered_map<uint32_t, double> c_contrib;
|
||||
c_contrib[G->Community(node_id)] = 0;
|
||||
for (const auto &neigh : G->Neighbours(node_id)) {
|
||||
uint32_t nxt_id = neigh.dest;
|
||||
double weight = neigh.weight;
|
||||
double contrib = weight - G->IncidentWeight(node_id) *
|
||||
G->IncidentWeight(nxt_id) / (2 * total_w);
|
||||
c_contrib[G->Community(nxt_id)] += contrib;
|
||||
}
|
||||
|
||||
auto best_c = std::max_element(c_contrib.begin(), c_contrib.end(),
|
||||
[](const std::pair<uint32_t, double> &p1,
|
||||
const std::pair<uint32_t, double> &p2) {
|
||||
return p1.second < p2.second;
|
||||
});
|
||||
|
||||
if (best_c->second - c_contrib[G->Community(node_id)] > 0) {
|
||||
G->SetCommunity(node_id, best_c->first);
|
||||
stable = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} // anonymous namespace
|
||||
|
||||
namespace algorithms {
|
||||
|
||||
void Louvain(comdata::Graph *G) {
|
||||
OptimizeLocally(G);
|
||||
|
||||
// Collapse the locally optimized graph.
|
||||
uint32_t collapsed_nodes = G->NormalizeCommunities();
|
||||
if (collapsed_nodes == G->Size()) return;
|
||||
comdata::Graph collapsed_G(collapsed_nodes);
|
||||
std::map<std::pair<uint32_t, uint32_t>, double> collapsed_edges;
|
||||
|
||||
for (uint32_t node_id = 0; node_id < G->Size(); ++node_id) {
|
||||
std::unordered_map<uint32_t, double> edges;
|
||||
for (const auto &neigh : G->Neighbours(node_id)) {
|
||||
uint32_t nxt_id = neigh.dest;
|
||||
double weight = neigh.weight;
|
||||
if (G->Community(nxt_id) < G->Community(node_id)) continue;
|
||||
edges[G->Community(nxt_id)] += weight;
|
||||
}
|
||||
for (const auto &neigh : edges) {
|
||||
uint32_t a = std::min(G->Community(node_id), neigh.first);
|
||||
uint32_t b = std::max(G->Community(node_id), neigh.first);
|
||||
collapsed_edges[{a, b}] += neigh.second;
|
||||
}
|
||||
}
|
||||
|
||||
for (const auto &p : collapsed_edges)
|
||||
collapsed_G.AddEdge(p.first.first, p.first.second, p.second);
|
||||
|
||||
// Repeat until no local optimizations can be found.
|
||||
Louvain(&collapsed_G);
|
||||
|
||||
// Propagate results from collapsed graph.
|
||||
for (uint32_t node_id = 0; node_id < G->Size(); ++node_id)
|
||||
G->SetCommunity(node_id, collapsed_G.Community(G->Community(node_id)));
|
||||
G->NormalizeCommunities();
|
||||
}
|
||||
|
||||
} // namespace algorithms
|
106
query_modules/louvain/src/data_structures/graph.cpp
Normal file
106
query_modules/louvain/src/data_structures/graph.cpp
Normal file
@ -0,0 +1,106 @@
|
||||
#include "data_structures/graph.hpp"
|
||||
|
||||
#include <numeric>
|
||||
#include <unordered_map>
|
||||
#include <unordered_set>
|
||||
#include <vector>
|
||||
|
||||
#include <glog/logging.h>
|
||||
|
||||
namespace comdata {
|
||||
|
||||
Graph::Graph(uint32_t n_nodes) : n_nodes_(n_nodes), total_w_(0) {
|
||||
adj_list_.resize(n_nodes, {});
|
||||
inc_w_.resize(n_nodes, 0);
|
||||
|
||||
// each node starts as its own separate community.
|
||||
community_.resize(n_nodes);
|
||||
std::iota(community_.begin(), community_.end(), 0);
|
||||
}
|
||||
|
||||
uint32_t Graph::Size() const {
|
||||
return n_nodes_;
|
||||
}
|
||||
|
||||
uint32_t Graph::Community(uint32_t node) const {
|
||||
CHECK(node < n_nodes_) << "Node index out of range";
|
||||
return community_[node];
|
||||
}
|
||||
|
||||
void Graph::SetCommunity(uint32_t node, uint32_t c) {
|
||||
CHECK(node < n_nodes_) << "Node index out of range";
|
||||
community_[node] = c;
|
||||
}
|
||||
|
||||
uint32_t Graph::NormalizeCommunities() {
|
||||
std::set<uint32_t> c_id(community_.begin(), community_.end());
|
||||
std::unordered_map<uint32_t, uint32_t> cmap;
|
||||
uint32_t id = 0;
|
||||
for (uint32_t c : c_id) {
|
||||
cmap[c] = id;
|
||||
++id;
|
||||
}
|
||||
for (uint32_t node_id = 0; node_id < n_nodes_; ++node_id)
|
||||
community_[node_id] = cmap[community_[node_id]];
|
||||
return id;
|
||||
}
|
||||
|
||||
void Graph::AddEdge(uint32_t node1, uint32_t node2, double weight) {
|
||||
CHECK(node1 < n_nodes_) << "Node index out of range";
|
||||
CHECK(node2 < n_nodes_) << "Node index out of range";
|
||||
CHECK(weight > 0) << "Weights must be positive";
|
||||
CHECK(edges_.find({node1, node2}) == edges_.end()) << "Edge already exists";
|
||||
|
||||
edges_.emplace(node1, node2);
|
||||
edges_.emplace(node2, node1);
|
||||
|
||||
total_w_ += weight;
|
||||
|
||||
adj_list_[node1].emplace_back(node2, weight);
|
||||
inc_w_[node1] += weight;
|
||||
|
||||
if (node1 != node2) {
|
||||
adj_list_[node2].emplace_back(node1, weight);
|
||||
inc_w_[node2] += weight;
|
||||
}
|
||||
}
|
||||
|
||||
uint32_t Graph::Degree(uint32_t node) const {
|
||||
CHECK(node < n_nodes_) << "Node index out of range";
|
||||
return static_cast<uint32_t>(adj_list_[node].size());
|
||||
}
|
||||
|
||||
double Graph::IncidentWeight(uint32_t node) const {
|
||||
CHECK(node < n_nodes_) << "Node index out of range";
|
||||
return inc_w_[node];
|
||||
}
|
||||
|
||||
double Graph::TotalWeight() const {
|
||||
return total_w_;
|
||||
}
|
||||
|
||||
double Graph::Modularity() const {
|
||||
double ret = 0;
|
||||
// Since all weights should be positive, this implies that our graph has
|
||||
// no edges.
|
||||
if (total_w_ == 0)
|
||||
return 0;
|
||||
|
||||
for (uint32_t i = 0; i < n_nodes_; ++i) {
|
||||
for (const auto &neigh : adj_list_[i]) {
|
||||
uint32_t j = neigh.dest;
|
||||
double w = neigh.weight;
|
||||
if (Community(i) != Community(j)) continue;
|
||||
ret += w - (IncidentWeight(i) * IncidentWeight(j) / (2.0 * total_w_));
|
||||
}
|
||||
}
|
||||
ret /= 2 * total_w_;
|
||||
return ret;
|
||||
}
|
||||
|
||||
const std::vector<Neighbour>& Graph::Neighbours(uint32_t node) const {
|
||||
CHECK(node < n_nodes_) << "Node index out of range";
|
||||
return adj_list_[node];
|
||||
}
|
||||
|
||||
} // namespace comdata
|
109
query_modules/louvain/src/data_structures/graph.hpp
Normal file
109
query_modules/louvain/src/data_structures/graph.hpp
Normal file
@ -0,0 +1,109 @@
|
||||
/// @file graph.hpp
|
||||
|
||||
#pragma once
|
||||
|
||||
#include <cstdint>
|
||||
#include <set>
|
||||
#include <vector>
|
||||
|
||||
namespace comdata {
|
||||
|
||||
struct Neighbour {
|
||||
uint32_t dest;
|
||||
double weight;
|
||||
Neighbour(uint32_t d, double w) : dest(d), weight(w) {}
|
||||
};
|
||||
|
||||
/// Class which models a weighted, undirected graph with necessary
|
||||
/// functionalities for community detection algorithms.
|
||||
class Graph {
|
||||
public:
|
||||
/// Constructs a new graph with a given number of nodes and no edges between
|
||||
/// them.
|
||||
///
|
||||
/// At the moment, the implementation assumes (and enforces) that all nodes
|
||||
/// are indexed from 0 to n_nodes - 1. This will be changed in the final
|
||||
/// implementation. - TODO(ipaljak)
|
||||
///
|
||||
/// @param n_nodes Number of nodes in the graph.
|
||||
explicit Graph(uint32_t n_nodes);
|
||||
|
||||
/// @return number of nodes in the graph.
|
||||
uint32_t Size() const;
|
||||
|
||||
/// Adds a bidirectional, weighted edge to the graph between the given
|
||||
/// nodes. If both given nodes are the same, the method inserts a weighted
|
||||
/// self-loop.
|
||||
///
|
||||
/// There should be no edges between the given nodes when before invoking
|
||||
/// this method.
|
||||
///
|
||||
/// @param node1 index of an incident node.
|
||||
/// @param node2 index of an incident node.
|
||||
/// @param weight real value which represents the weight of the edge.
|
||||
void AddEdge(uint32_t node1, uint32_t node2, double weight);
|
||||
|
||||
/// @param node index of node.
|
||||
/// @return community where the node belongs to.
|
||||
uint32_t Community(uint32_t node) const;
|
||||
|
||||
/// Adds a given node to a given community.
|
||||
///
|
||||
/// @param node index of node.
|
||||
/// @param c community where the given node should go in.
|
||||
void SetCommunity(uint32_t node, uint32_t c);
|
||||
|
||||
/// Normalizes the values of communities. More precisely, after invoking this
|
||||
/// method communities will be indexed by successive integers starting from 0.
|
||||
///
|
||||
/// Note: this method is computationally expensive and takes O(|V|)
|
||||
/// time, i.e., it traverses all nodes in the graph.
|
||||
///
|
||||
/// @return number of communities in the graph
|
||||
uint32_t NormalizeCommunities();
|
||||
|
||||
/// Returns the number of incident edges to a given node. Self-loops
|
||||
/// contribute a single edge to the degree.
|
||||
///
|
||||
/// @param node index of node.
|
||||
/// @return degree of given node.
|
||||
uint32_t Degree(uint32_t node) const;
|
||||
|
||||
/// Returns the total weight of incident edges to a given node. Weight
|
||||
/// of a self loop contributes once to the total sum.
|
||||
///
|
||||
/// @param node index of node.
|
||||
/// @return total incident weight of a given node.
|
||||
double IncidentWeight(uint32_t node) const;
|
||||
|
||||
/// @return total weight of all edges in a graph.
|
||||
double TotalWeight() const;
|
||||
|
||||
/// Calculates the modularity of the graph which is defined as a real value
|
||||
/// between -1 and 1 that measures the density of links inside communities
|
||||
/// compared to links between communities.
|
||||
///
|
||||
/// Note: this method is computationally expensive and takes O(|V| + |E|)
|
||||
/// time, i.e., it traverses the entire graph.
|
||||
///
|
||||
/// @return modularity of the graph.
|
||||
double Modularity() const;
|
||||
|
||||
/// Returns nodes adjacent to a given node.
|
||||
///
|
||||
/// @param node index of node.
|
||||
/// @return list of neighbouring nodes.
|
||||
const std::vector<Neighbour>& Neighbours(uint32_t node) const;
|
||||
|
||||
private:
|
||||
uint32_t n_nodes_;
|
||||
double total_w_;
|
||||
|
||||
std::vector<std::vector<Neighbour>> adj_list_;
|
||||
std::set<std::pair<uint32_t, uint32_t>> edges_;
|
||||
|
||||
std::vector<double> inc_w_;
|
||||
std::vector<uint32_t> community_;
|
||||
};
|
||||
|
||||
} // namespace comdata
|
Loading…
Reference in New Issue
Block a user