diff --git a/query_modules/louvain/src/algorithms/louvain.cpp b/query_modules/louvain/src/algorithms/louvain.cpp
index f9b416d08..2be08385c 100644
--- a/query_modules/louvain/src/algorithms/louvain.cpp
+++ b/query_modules/louvain/src/algorithms/louvain.cpp
@@ -15,29 +15,102 @@ void OptimizeLocally(comdata::Graph *graph) {
   std::iota(p.begin(), p.end(), 0);
   std::shuffle(p.begin(), p.end(), g);
 
-  double total_w = graph->TotalWeight();
+  // Modularity of a graph can be expressed as:
+  //
+  // Q = 1 / (2m) * sum_over_pairs_of_nodes[(Aij - ki * kj / 2m) * delta(ci, cj)]
+  //
+  // where m is the sum of all weights in the graph,
+  //       Aij is the weight on edge that connects i and j (i=j for a self-loop)
+  //       ki is the sum of weights incident to node i
+  //       ci is the community of node i
+  //       delta(a, b) is the Kronecker delta function.
+  //
+  // With some simple algebraic manipulations, we can transform the formula into:
+  //
+  // Q = sum_over_components[M * ((sum_over_pairs(Aij + M * ki * kj)))] =
+  //   = sum_over_components[M * (sum_over_pairs(Aij) + M * sum_over_nodes^2(ki))] =
+  //   = sum_over_components[M * (w_contrib(ci) + M * k_contrib^2(ci))]
+  //
+  // where M = 1 / (2m)
+  //
+  // Therefore, we could store for each community the following:
+  //    * Weight contribution (w_contrib)
+  //    * Weighted degree contribution (k_contrib)
+  //
+  // This allows us to efficiently remove a node from one community and insert
+  // it into a community of its neighbour without the need to recalculate
+  // modularity from scratch.
+
+  std::unordered_map<uint32_t, double> w_contrib;
+  std::unordered_map<uint32_t, double> k_contrib;
+
+  for (uint32_t node_id = 0; node_id < graph->Size(); ++node_id) {
+    k_contrib[graph->Community(node_id)] += graph->IncidentWeight(node_id);
+    for (const auto &neigh : graph->Neighbours(node_id)) {
+      uint32_t nxt_id = neigh.dest;
+      double w = neigh.weight;
+      if (graph->Community(node_id) == graph->Community(nxt_id))
+        w_contrib[graph->Community(node_id)] += w;
+    }
+  }
+
   bool stable = false;
+  double total_w = graph->TotalWeight();
+
   while (!stable) {
     stable = true;
     for (uint32_t node_id : p) {
-      std::unordered_map<uint32_t, double> c_contrib;
-      c_contrib[graph->Community(node_id)] = 0;
+      std::unordered_map<uint32_t, double> sum_w;
+      double self_loop = 0;
+      sum_w[graph->Community(node_id)] = 0;
       for (const auto &neigh : graph->Neighbours(node_id)) {
         uint32_t nxt_id = neigh.dest;
         double weight = neigh.weight;
-        double contrib = weight - graph->IncidentWeight(node_id) *
-                                  graph->IncidentWeight(nxt_id) / (2 * total_w);
-        c_contrib[graph->Community(nxt_id)] += contrib;
+        if (nxt_id == node_id) {
+          self_loop += weight;
+          continue;
+        }
+        sum_w[graph->Community(nxt_id)] += weight;
       }
 
-      auto best_c = std::max_element(c_contrib.begin(), c_contrib.end(),
-                                     [](const std::pair<uint32_t, double> &p1,
-                                        const std::pair<uint32_t, double> &p2) {
-                                       return p1.second < p2.second;
-                                     });
+      uint32_t my_c = graph->Community(node_id);
 
-      if (best_c->second - c_contrib[graph->Community(node_id)] > 0) {
-        graph->SetCommunity(node_id, best_c->first);
+      uint32_t best_c = my_c;
+      double best_dq = 0;
+
+      for (const auto &p : sum_w) {
+        if (p.first == my_c) continue;
+        uint32_t nxt_c = p.first;
+        double dq = 0;
+
+        // contributions before swap (dq = d_after - d_before)
+        for (uint32_t c : {my_c, nxt_c})
+          dq -= w_contrib[c] - k_contrib[c] * k_contrib[c] / (2.0 * total_w);
+
+        // leave the current community
+        dq += (w_contrib[my_c] - 2.0 * sum_w[my_c] - self_loop) -
+              (k_contrib[my_c] - graph->IncidentWeight(node_id)) *
+                  (k_contrib[my_c] - graph->IncidentWeight(node_id)) /
+                  (2.0 * total_w);
+
+        // join a new community
+        dq += (w_contrib[nxt_c] + 2.0 * sum_w[nxt_c] + self_loop) -
+              (k_contrib[nxt_c] + graph->IncidentWeight(node_id)) *
+                  (k_contrib[nxt_c] + graph->IncidentWeight(node_id)) /
+                  (2.0 * total_w);
+
+        if (dq > best_dq) {
+          best_dq = dq;
+          best_c = nxt_c;
+        }
+      }
+
+      if (best_c != my_c) {
+        graph->SetCommunity(node_id, best_c);
+        w_contrib[my_c] -= 2.0 * sum_w[my_c] + self_loop;
+        k_contrib[my_c] -= graph->IncidentWeight(node_id);
+        w_contrib[best_c] += 2.0 * sum_w[best_c] + self_loop;
+        k_contrib[best_c] += graph->IncidentWeight(node_id);
         stable = false;
       }
     }
diff --git a/query_modules/louvain/src/data_structures/graph.cpp b/query_modules/louvain/src/data_structures/graph.cpp
index 23508234b..8d3e1d181 100644
--- a/query_modules/louvain/src/data_structures/graph.cpp
+++ b/query_modules/louvain/src/data_structures/graph.cpp
@@ -82,8 +82,7 @@ double Graph::Modularity() const {
   std::unordered_map<uint32_t, double> degree_c;
 
   for (uint32_t i = 0; i < n_nodes_; ++i) {
-    degree_c[Community(i)] +=
-        static_cast<double>(IncidentWeight(i));
+    degree_c[Community(i)] += IncidentWeight(i);
     for (const auto &neigh : adj_list_[i]) {
       uint32_t j = neigh.dest;
       double w = neigh.weight;
diff --git a/query_modules/louvain/src/main.cpp b/query_modules/louvain/src/main.cpp
index d1f96efe6..297efe767 100644
--- a/query_modules/louvain/src/main.cpp
+++ b/query_modules/louvain/src/main.cpp
@@ -22,7 +22,7 @@ int main() {
   algorithms::Louvain(&graph);
 
   for (int i = 0; i < n; ++i)
-    std::cout << i << graph.Community(i) << "\n";
+    std::cout << i << " " << graph.Community(i) << "\n";
   std::cout << graph.Modularity() << "\n";
   return 0;
 }