Fix modularity calculation bug in Louvain query module

Reviewers: dsantl

Reviewed By: dsantl

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D2607
This commit is contained in:
Ivan Paljak 2019-12-18 15:14:47 +01:00
parent ec67e71d39
commit 591eadad20
3 changed files with 88 additions and 16 deletions

View File

@ -15,29 +15,102 @@ void OptimizeLocally(comdata::Graph *graph) {
std::iota(p.begin(), p.end(), 0);
std::shuffle(p.begin(), p.end(), g);
double total_w = graph->TotalWeight();
// Modularity of a graph can be expressed as:
//
// Q = 1 / (2m) * sum_over_pairs_of_nodes[(Aij - ki * kj / 2m) * delta(ci, cj)]
//
// where m is the sum of all weights in the graph,
// Aij is the weight on edge that connects i and j (i=j for a self-loop)
// ki is the sum of weights incident to node i
// ci is the community of node i
// delta(a, b) is the Kronecker delta function.
//
// With some simple algebraic manipulations, we can transform the formula into:
//
// Q = sum_over_components[M * ((sum_over_pairs(Aij + M * ki * kj)))] =
// = sum_over_components[M * (sum_over_pairs(Aij) + M * sum_over_nodes^2(ki))] =
// = sum_over_components[M * (w_contrib(ci) + M * k_contrib^2(ci))]
//
// where M = 1 / (2m)
//
// Therefore, we could store for each community the following:
// * Weight contribution (w_contrib)
// * Weighted degree contribution (k_contrib)
//
// This allows us to efficiently remove a node from one community and insert
// it into a community of its neighbour without the need to recalculate
// modularity from scratch.
std::unordered_map<uint32_t, double> w_contrib;
std::unordered_map<uint32_t, double> k_contrib;
for (uint32_t node_id = 0; node_id < graph->Size(); ++node_id) {
k_contrib[graph->Community(node_id)] += graph->IncidentWeight(node_id);
for (const auto &neigh : graph->Neighbours(node_id)) {
uint32_t nxt_id = neigh.dest;
double w = neigh.weight;
if (graph->Community(node_id) == graph->Community(nxt_id))
w_contrib[graph->Community(node_id)] += w;
}
}
bool stable = false;
double total_w = graph->TotalWeight();
while (!stable) {
stable = true;
for (uint32_t node_id : p) {
std::unordered_map<uint32_t, double> c_contrib;
c_contrib[graph->Community(node_id)] = 0;
std::unordered_map<uint32_t, double> sum_w;
double self_loop = 0;
sum_w[graph->Community(node_id)] = 0;
for (const auto &neigh : graph->Neighbours(node_id)) {
uint32_t nxt_id = neigh.dest;
double weight = neigh.weight;
double contrib = weight - graph->IncidentWeight(node_id) *
graph->IncidentWeight(nxt_id) / (2 * total_w);
c_contrib[graph->Community(nxt_id)] += contrib;
if (nxt_id == node_id) {
self_loop += weight;
continue;
}
sum_w[graph->Community(nxt_id)] += weight;
}
auto best_c = std::max_element(c_contrib.begin(), c_contrib.end(),
[](const std::pair<uint32_t, double> &p1,
const std::pair<uint32_t, double> &p2) {
return p1.second < p2.second;
});
uint32_t my_c = graph->Community(node_id);
if (best_c->second - c_contrib[graph->Community(node_id)] > 0) {
graph->SetCommunity(node_id, best_c->first);
uint32_t best_c = my_c;
double best_dq = 0;
for (const auto &p : sum_w) {
if (p.first == my_c) continue;
uint32_t nxt_c = p.first;
double dq = 0;
// contributions before swap (dq = d_after - d_before)
for (uint32_t c : {my_c, nxt_c})
dq -= w_contrib[c] - k_contrib[c] * k_contrib[c] / (2.0 * total_w);
// leave the current community
dq += (w_contrib[my_c] - 2.0 * sum_w[my_c] - self_loop) -
(k_contrib[my_c] - graph->IncidentWeight(node_id)) *
(k_contrib[my_c] - graph->IncidentWeight(node_id)) /
(2.0 * total_w);
// join a new community
dq += (w_contrib[nxt_c] + 2.0 * sum_w[nxt_c] + self_loop) -
(k_contrib[nxt_c] + graph->IncidentWeight(node_id)) *
(k_contrib[nxt_c] + graph->IncidentWeight(node_id)) /
(2.0 * total_w);
if (dq > best_dq) {
best_dq = dq;
best_c = nxt_c;
}
}
if (best_c != my_c) {
graph->SetCommunity(node_id, best_c);
w_contrib[my_c] -= 2.0 * sum_w[my_c] + self_loop;
k_contrib[my_c] -= graph->IncidentWeight(node_id);
w_contrib[best_c] += 2.0 * sum_w[best_c] + self_loop;
k_contrib[best_c] += graph->IncidentWeight(node_id);
stable = false;
}
}

View File

@ -82,8 +82,7 @@ double Graph::Modularity() const {
std::unordered_map<uint32_t, double> degree_c;
for (uint32_t i = 0; i < n_nodes_; ++i) {
degree_c[Community(i)] +=
static_cast<double>(IncidentWeight(i));
degree_c[Community(i)] += IncidentWeight(i);
for (const auto &neigh : adj_list_[i]) {
uint32_t j = neigh.dest;
double w = neigh.weight;

View File

@ -22,7 +22,7 @@ int main() {
algorithms::Louvain(&graph);
for (int i = 0; i < n; ++i)
std::cout << i << graph.Community(i) << "\n";
std::cout << i << " " << graph.Community(i) << "\n";
std::cout << graph.Modularity() << "\n";
return 0;
}