diff --git a/query_modules/louvain/src/algorithms/louvain.cpp b/query_modules/louvain/src/algorithms/louvain.cpp index f9b416d08..2be08385c 100644 --- a/query_modules/louvain/src/algorithms/louvain.cpp +++ b/query_modules/louvain/src/algorithms/louvain.cpp @@ -15,29 +15,102 @@ void OptimizeLocally(comdata::Graph *graph) { std::iota(p.begin(), p.end(), 0); std::shuffle(p.begin(), p.end(), g); - double total_w = graph->TotalWeight(); + // Modularity of a graph can be expressed as: + // + // Q = 1 / (2m) * sum_over_pairs_of_nodes[(Aij - ki * kj / 2m) * delta(ci, cj)] + // + // where m is the sum of all weights in the graph, + // Aij is the weight on edge that connects i and j (i=j for a self-loop) + // ki is the sum of weights incident to node i + // ci is the community of node i + // delta(a, b) is the Kronecker delta function. + // + // With some simple algebraic manipulations, we can transform the formula into: + // + // Q = sum_over_components[M * ((sum_over_pairs(Aij + M * ki * kj)))] = + // = sum_over_components[M * (sum_over_pairs(Aij) + M * sum_over_nodes^2(ki))] = + // = sum_over_components[M * (w_contrib(ci) + M * k_contrib^2(ci))] + // + // where M = 1 / (2m) + // + // Therefore, we could store for each community the following: + // * Weight contribution (w_contrib) + // * Weighted degree contribution (k_contrib) + // + // This allows us to efficiently remove a node from one community and insert + // it into a community of its neighbour without the need to recalculate + // modularity from scratch. + + std::unordered_map<uint32_t, double> w_contrib; + std::unordered_map<uint32_t, double> k_contrib; + + for (uint32_t node_id = 0; node_id < graph->Size(); ++node_id) { + k_contrib[graph->Community(node_id)] += graph->IncidentWeight(node_id); + for (const auto &neigh : graph->Neighbours(node_id)) { + uint32_t nxt_id = neigh.dest; + double w = neigh.weight; + if (graph->Community(node_id) == graph->Community(nxt_id)) + w_contrib[graph->Community(node_id)] += w; + } + } + bool stable = false; + double total_w = graph->TotalWeight(); + while (!stable) { stable = true; for (uint32_t node_id : p) { - std::unordered_map<uint32_t, double> c_contrib; - c_contrib[graph->Community(node_id)] = 0; + std::unordered_map<uint32_t, double> sum_w; + double self_loop = 0; + sum_w[graph->Community(node_id)] = 0; for (const auto &neigh : graph->Neighbours(node_id)) { uint32_t nxt_id = neigh.dest; double weight = neigh.weight; - double contrib = weight - graph->IncidentWeight(node_id) * - graph->IncidentWeight(nxt_id) / (2 * total_w); - c_contrib[graph->Community(nxt_id)] += contrib; + if (nxt_id == node_id) { + self_loop += weight; + continue; + } + sum_w[graph->Community(nxt_id)] += weight; } - auto best_c = std::max_element(c_contrib.begin(), c_contrib.end(), - [](const std::pair<uint32_t, double> &p1, - const std::pair<uint32_t, double> &p2) { - return p1.second < p2.second; - }); + uint32_t my_c = graph->Community(node_id); - if (best_c->second - c_contrib[graph->Community(node_id)] > 0) { - graph->SetCommunity(node_id, best_c->first); + uint32_t best_c = my_c; + double best_dq = 0; + + for (const auto &p : sum_w) { + if (p.first == my_c) continue; + uint32_t nxt_c = p.first; + double dq = 0; + + // contributions before swap (dq = d_after - d_before) + for (uint32_t c : {my_c, nxt_c}) + dq -= w_contrib[c] - k_contrib[c] * k_contrib[c] / (2.0 * total_w); + + // leave the current community + dq += (w_contrib[my_c] - 2.0 * sum_w[my_c] - self_loop) - + (k_contrib[my_c] - graph->IncidentWeight(node_id)) * + (k_contrib[my_c] - graph->IncidentWeight(node_id)) / + (2.0 * total_w); + + // join a new community + dq += (w_contrib[nxt_c] + 2.0 * sum_w[nxt_c] + self_loop) - + (k_contrib[nxt_c] + graph->IncidentWeight(node_id)) * + (k_contrib[nxt_c] + graph->IncidentWeight(node_id)) / + (2.0 * total_w); + + if (dq > best_dq) { + best_dq = dq; + best_c = nxt_c; + } + } + + if (best_c != my_c) { + graph->SetCommunity(node_id, best_c); + w_contrib[my_c] -= 2.0 * sum_w[my_c] + self_loop; + k_contrib[my_c] -= graph->IncidentWeight(node_id); + w_contrib[best_c] += 2.0 * sum_w[best_c] + self_loop; + k_contrib[best_c] += graph->IncidentWeight(node_id); stable = false; } } diff --git a/query_modules/louvain/src/data_structures/graph.cpp b/query_modules/louvain/src/data_structures/graph.cpp index 23508234b..8d3e1d181 100644 --- a/query_modules/louvain/src/data_structures/graph.cpp +++ b/query_modules/louvain/src/data_structures/graph.cpp @@ -82,8 +82,7 @@ double Graph::Modularity() const { std::unordered_map<uint32_t, double> degree_c; for (uint32_t i = 0; i < n_nodes_; ++i) { - degree_c[Community(i)] += - static_cast<double>(IncidentWeight(i)); + degree_c[Community(i)] += IncidentWeight(i); for (const auto &neigh : adj_list_[i]) { uint32_t j = neigh.dest; double w = neigh.weight; diff --git a/query_modules/louvain/src/main.cpp b/query_modules/louvain/src/main.cpp index d1f96efe6..297efe767 100644 --- a/query_modules/louvain/src/main.cpp +++ b/query_modules/louvain/src/main.cpp @@ -22,7 +22,7 @@ int main() { algorithms::Louvain(&graph); for (int i = 0; i < n; ++i) - std::cout << i << graph.Community(i) << "\n"; + std::cout << i << " " << graph.Community(i) << "\n"; std::cout << graph.Modularity() << "\n"; return 0; }