diff --git a/query_modules/CMakeLists.txt b/query_modules/CMakeLists.txt index 72600ea02..0feac4855 100644 --- a/query_modules/CMakeLists.txt +++ b/query_modules/CMakeLists.txt @@ -17,3 +17,5 @@ install(PROGRAMS $ RENAME example.so) # Also install the source of the example, so user can read it. install(FILES example.c DESTINATION lib/memgraph/query_modules) + +add_subdirectory(louvain) diff --git a/query_modules/louvain/CMakeLists.txt b/query_modules/louvain/CMakeLists.txt new file mode 100644 index 000000000..1bc10b401 --- /dev/null +++ b/query_modules/louvain/CMakeLists.txt @@ -0,0 +1,25 @@ +set(MAIN src/main.cpp) +set(MODULE src/louvain_module.cpp) +set(SOURCES src/algorithms/louvain.cpp + src/data_structures/graph.cpp) + +include_directories(src) + +add_library(louvain-core STATIC ${SOURCES}) +set_target_properties(louvain-core PROPERTIES POSITION_INDEPENDENT_CODE ON) +target_link_libraries(louvain-core PUBLIC Threads::Threads glog gflags) + +add_executable(louvain-main ${MAIN}) +target_link_libraries(louvain-main louvain-core) + +enable_testing() +add_subdirectory(test) + +add_library(louvain SHARED ${MODULE}) +target_include_directories(louvain PRIVATE ${CMAKE_SOURCE_DIR}/include) + +if (NOT MG_COMMUNITY) + install(PROGRAMS $ + DESTINATION lib/memgraph/query_modules + RENAME louvain.so) +endif() diff --git a/query_modules/louvain/src/louvain_module.cpp b/query_modules/louvain/src/louvain_module.cpp new file mode 100644 index 000000000..e8718a156 --- /dev/null +++ b/query_modules/louvain/src/louvain_module.cpp @@ -0,0 +1,22 @@ +#include "mg_procedure.h" + +static void communities(const mgp_list *args, const mgp_graph *graph, + mgp_result *result, mgp_memory *memory) { + mgp_result_record *record = mgp_result_new_record(result); + mgp_value *hello_world_value = + mgp_value_make_string("Louvain, fuck yeah!", memory); + mgp_result_record_insert(record, "result", hello_world_value); + mgp_value_destroy(hello_world_value); +} + +extern "C" int mgp_init_module(struct mgp_module *module, + struct mgp_memory *memory) { + struct mgp_proc *proc = + mgp_module_add_read_procedure(module, "communities", communities); + if (!mgp_proc_add_result(proc, "result", mgp_type_string())) return 1; + return 0; +} + +extern "C" int mgp_shutdown_module() { + return 0; +} diff --git a/query_modules/louvain/src/main.cpp b/query_modules/louvain/src/main.cpp new file mode 100644 index 000000000..86d5d2e11 --- /dev/null +++ b/query_modules/louvain/src/main.cpp @@ -0,0 +1,28 @@ +#include + +#include "algorithms/algorithms.hpp" +#include "data_structures/graph.hpp" + +// A simple program that reads the graph from STDIN and +// outputs the detected communities from louvain along with +// its modularity measure on STDOUT. +int main() { + int n; + int m; + std::cin >> n >> m; + comdata::Graph G(n); + for (int i = 0; i < m; ++i) { + int a; + int b; + double c; + std::cin >> a >> b >> c; + G.AddEdge(a, b, c); + } + + algorithms::Louvain(&G); + + for (int i = 0; i < n; ++i) + std::cout << i << G.Community(i) << "\n"; + std::cout << G.Modularity() << "\n"; + return 0; +} diff --git a/query_modules/louvain/test/.clang-tidy b/query_modules/louvain/test/.clang-tidy new file mode 100644 index 000000000..559bdb744 --- /dev/null +++ b/query_modules/louvain/test/.clang-tidy @@ -0,0 +1,80 @@ +--- +Checks: '*, + -android-*, + -cert-err58-cpp, + -cppcoreguidelines-avoid-c-arrays, + -cppcoreguidelines-avoid-goto, + -cppcoreguidelines-avoid-magic-numbers, + -cppcoreguidelines-macro-usage, + -cppcoreguidelines-no-malloc, + -cppcoreguidelines-non-private-member-variables-in-classes, + -cppcoreguidelines-owning-memory, + -cppcoreguidelines-pro-bounds-array-to-pointer-decay, + -cppcoreguidelines-pro-bounds-constant-array-index, + -cppcoreguidelines-pro-bounds-pointer-arithmetic, + -cppcoreguidelines-pro-type-member-init, + -cppcoreguidelines-pro-type-reinterpret-cast, + -cppcoreguidelines-pro-type-static-cast-downcast, + -cppcoreguidelines-pro-type-union-access, + -cppcoreguidelines-pro-type-vararg, + -cppcoreguidelines-special-member-functions, + -fuchsia-default-arguments, + -fuchsia-default-arguments-calls, + -fuchsia-default-arguments-declarations, + -fuchsia-overloaded-operator, + -fuchsia-statically-constructed-objects, + -fuchsia-trailing-return, + -fuchsia-virtual-inheritance, + -google-explicit-constructor, + -google-readability-*, + -hicpp-avoid-c-arrays, + -hicpp-avoid-goto, + -hicpp-braces-around-statements, + -hicpp-member-init, + -hicpp-no-array-decay, + -hicpp-no-assembler, + -hicpp-no-malloc, + -hicpp-special-member-functions, + -hicpp-use-equals-default, + -hicpp-vararg, + -llvm-header-guard, + -misc-non-private-member-variables-in-classes, + -misc-unused-parameters, + -modernize-avoid-c-arrays, + -modernize-concat-nested-namespaces, + -modernize-pass-by-value, + -modernize-use-equals-default, + -modernize-use-nodiscard, + -modernize-use-trailing-return-type, + -performance-unnecessary-value-param, + -readability-braces-around-statements, + -readability-else-after-return, + -readability-implicit-bool-conversion, + -readability-magic-numbers, + -readability-named-parameter' +WarningsAsErrors: '' +HeaderFilterRegex: '' +AnalyzeTemporaryDtors: false +FormatStyle: none +CheckOptions: + - key: google-readability-braces-around-statements.ShortStatementLines + value: '1' + - key: google-readability-function-size.StatementThreshold + value: '800' + - key: google-readability-namespace-comments.ShortNamespaceLines + value: '10' + - key: google-readability-namespace-comments.SpacesBeforeComments + value: '2' + - key: modernize-loop-convert.MaxCopySize + value: '16' + - key: modernize-loop-convert.MinConfidence + value: reasonable + - key: modernize-loop-convert.NamingStyle + value: CamelCase + - key: modernize-pass-by-value.IncludeStyle + value: llvm + - key: modernize-replace-auto-ptr.IncludeStyle + value: llvm + - key: modernize-use-nullptr.NullMacros + value: 'NULL' +... diff --git a/query_modules/louvain/test/CMakeLists.txt b/query_modules/louvain/test/CMakeLists.txt new file mode 100644 index 000000000..eadc4f19a --- /dev/null +++ b/query_modules/louvain/test/CMakeLists.txt @@ -0,0 +1,3 @@ +include_directories(${GTEST_INCLUDE_DIR}) + +add_subdirectory(unit) diff --git a/query_modules/louvain/test/unit/CMakeLists.txt b/query_modules/louvain/test/unit/CMakeLists.txt new file mode 100644 index 000000000..ac879e992 --- /dev/null +++ b/query_modules/louvain/test/unit/CMakeLists.txt @@ -0,0 +1,28 @@ +set(test_prefix louvain__unit__) + +add_custom_target(louvain__unit) + +add_library(louvain-test STATIC utils.cpp) +set_target_properties(louvain-test PROPERTIES POSITION_INDEPENDENT_CODE ON) + +function(add_unit_test test_cpp) + # get exec name (remove extension from the abs path) + get_filename_component(exec_name ${test_cpp} NAME_WE) + set(target_name ${test_prefix}${exec_name}) + add_executable(${target_name} ${test_cpp}) + # OUTPUT_NAME sets the real name of a target when it is built and can be + # used to help create two targets of the same name even though CMake + # requires unique logical target names + set_target_properties(${target_name} PROPERTIES OUTPUT_NAME ${exec_name}) + # TODO: this is a temporary workaround the test build warnings + target_compile_options(${target_name} PRIVATE -Wno-comment -Wno-sign-compare + -Wno-unused-variable) + target_link_libraries(${target_name} glog gflags gtest gtest_main Threads::Threads + louvain-core louvain-test) + # register test + add_test(${target_name} ${exec_name}) + # add to unit target + add_dependencies(louvain__unit ${target_name}) +endfunction(add_unit_test) + +add_unit_test(graph.cpp) diff --git a/query_modules/louvain/test/unit/graph.cpp b/query_modules/louvain/test/unit/graph.cpp new file mode 100644 index 000000000..8afcbff5a --- /dev/null +++ b/query_modules/louvain/test/unit/graph.cpp @@ -0,0 +1,323 @@ +#include +#include + +#include "data_structures/graph.hpp" +#include "utils.hpp" + +// Checks if commmunities of nodes in G correspond to a given community vector. +bool community_check(const comdata::Graph &G, const std::vector &c) { + if (G.Size() != c.size()) return false; + for (uint32_t node_id = 0; node_id < G.Size(); ++node_id) + if (G.Community(node_id) != c[node_id]) + return false; + return true; +} + +// Checks if degrees of nodes in G correspond to a given degree vector. +bool degree_check(const comdata::Graph &G, const std::vector °) { + if (G.Size() != deg.size()) return false; + for (uint32_t node_id = 0; node_id < G.Size(); ++node_id) + if (G.Degree(node_id) != deg[node_id]) + return false; + return true; +} + +// Checks if incident weights of nodes in G correspond to a given weight vector. +bool inc_w_check(const comdata::Graph &G, const std::vector &inc_w) { + if (G.Size() != inc_w.size()) return false; + for (uint32_t node_id = 0; node_id < G.Size(); ++node_id) + if (std::abs(G.IncidentWeight(node_id) - inc_w[node_id]) > 1e-6) + return false; + return true; +} + +// Sets communities of nodes in G. Returns true on success. +bool set_communities(comdata::Graph *G, const std::vector &c) { + if (G->Size() != c.size()) return false; + for (uint32_t node_id = 0; node_id < G->Size(); ++node_id) + G->SetCommunity(node_id, c[node_id]); + return true; +} + +TEST(Graph, Constructor) { + uint32_t nodes = 100; + comdata::Graph G(nodes); + ASSERT_EQ(G.Size(), nodes); + for (uint32_t node_id = 0; node_id < nodes; ++node_id) { + ASSERT_EQ(G.IncidentWeight(node_id), 0); + ASSERT_EQ(G.Community(node_id), node_id); + } +} + +TEST(Graph, Size) { + comdata::Graph G1 = GenRandomUnweightedGraph(0, 0); + comdata::Graph G2 = GenRandomUnweightedGraph(42, 41); + comdata::Graph G3 = GenRandomUnweightedGraph(100, 250); + ASSERT_EQ(G1.Size(), 0); + ASSERT_EQ(G2.Size(), 42); + ASSERT_EQ(G3.Size(), 100); +} + +TEST(Graph, Communities) { + comdata::Graph G = GenRandomUnweightedGraph(100, 250); + + for (int i = 0; i < 100; ++i) G.SetCommunity(i, i % 5); + for (int i = 0; i < 100; ++i) ASSERT_EQ(G.Community(i), i % 5); + + // Try to set communities on non-existing nodes + ASSERT_DEATH({ G.SetCommunity(100, 2); }, ""); + ASSERT_DEATH({ G.SetCommunity(150, 0); }, ""); + + // Try to get a the community of a non-existing node + ASSERT_DEATH({ G.Community(100); }, ""); + ASSERT_DEATH({ G.Community(150); }, ""); +} + +TEST(Graph, CommunityNormalization) { + // Communities are already normalized. + comdata::Graph G = GenRandomUnweightedGraph(5, 10); + std::vector init_c = {0, 2, 1, 3, 4}; + std::vector final_c = {0, 2, 1, 3, 4}; + ASSERT_TRUE(set_communities(&G, init_c)); + G.NormalizeCommunities(); + ASSERT_TRUE(community_check(G, final_c)); + + // Each node in its own community. + G = GenRandomUnweightedGraph(5, 10); + init_c = {20, 30, 10, 40, 50}; + final_c = {1, 2, 0, 3, 4}; + ASSERT_TRUE(set_communities(&G, init_c)); + G.NormalizeCommunities(); + ASSERT_TRUE(community_check(G, final_c)); + + // Multiple nodes in the same community + G = GenRandomUnweightedGraph(7, 10); + init_c = {13, 99, 13, 13, 1, 99, 1}; + final_c = {1, 2, 1, 1, 0, 2, 0}; + ASSERT_TRUE(set_communities(&G, init_c)); + G.NormalizeCommunities(); + ASSERT_TRUE(community_check(G, final_c)); +} + +TEST(Graph, AddEdge) { + comdata::Graph G = GenRandomUnweightedGraph(5, 0); + + // Node out of bounds. + ASSERT_DEATH({ G.AddEdge(1, 5, 7); }, ""); + + // Repeated edge + G.AddEdge(1, 2, 1); + ASSERT_DEATH({ G.AddEdge(1, 2, 7); }, ""); + + // Non-positive edge weight + ASSERT_DEATH({ G.AddEdge(2, 3, -7); }, ""); + ASSERT_DEATH({ G.AddEdge(3, 4, 0); }, ""); +} + +TEST(Graph, Degrees) { + // Graph without edges + comdata::Graph G = GenRandomUnweightedGraph(5, 0); + std::vector deg = {0, 0, 0, 0, 0}; + ASSERT_TRUE(degree_check(G, deg)); + + // Chain + // (0)--(1)--(2)--(3)--(4) + G = BuildGraph(5, {{0, 1, 1}, + {1, 2, 1}, + {2, 3, 1}, + {3, 4, 1}}); + deg = {1, 2, 2, 2, 1}; + ASSERT_TRUE(degree_check(G, deg)); + + // Tree + // (0)--(3) + // / \ + // (1) (2) + // | / \ + // (4) (5) (6) + G = BuildGraph(7, {{0, 1, 1}, + {0, 2, 1}, + {0, 3, 1}, + {1, 4, 1}, + {2, 5, 1}, + {2, 6, 1}}); + deg = {3, 2, 3, 1, 1, 1, 1}; + ASSERT_TRUE(degree_check(G, deg)); + + // Graph without self-loops + // (0)--(1) + // | \ | \ + // | \ | \ + // (2)--(3)-(4) + G = BuildGraph(5, {{0, 1, 1}, + {0, 2, 1}, + {0, 3, 1}, + {1, 3, 1}, + {1, 4, 1}, + {2, 3, 1}, + {3, 4, 1}}); + deg = {3, 3, 2, 4, 2}; + ASSERT_TRUE(degree_check(G, deg)); + + // Graph with self loop [*nodes have self loops] + // (0)--(1*) + // | \ | \ + // | \ | \ + // (2*)--(3)-(4*) + G = BuildGraph(5, {{0, 1, 1}, + {0, 2, 1}, + {0, 3, 1}, + {1, 3, 1}, + {1, 4, 1}, + {2, 3, 1}, + {3, 4, 1}, + {1, 1, 1}, + {2, 2, 2}, + {4, 4, 4}}); + deg = {3, 4, 3, 4, 3}; + ASSERT_TRUE(degree_check(G, deg)); + + // Try to get degree of non-existing nodes + ASSERT_DEATH({ G.Degree(5); }, ""); + ASSERT_DEATH({ G.Degree(100); }, ""); +} + +TEST(Graph, Weights) { + // Graph without edges + comdata::Graph G = GenRandomUnweightedGraph(5, 0); + std::vector inc_w = {0, 0, 0, 0, 0}; + ASSERT_TRUE(inc_w_check(G, inc_w)); + ASSERT_EQ(G.TotalWeight(), 0); + + // Chain + // (0)--(1)--(2)--(3)--(4) + G = BuildGraph(5, {{0, 1, 0.1}, + {1, 2, 0.5}, + {2, 3, 2.3}, + {3, 4, 4.2}}); + inc_w = {0.1, 0.6, 2.8, 6.5, 4.2}; + ASSERT_TRUE(inc_w_check(G, inc_w)); + ASSERT_NEAR(G.TotalWeight(), 7.1, 1e-6); + + // Tree + // (0)--(3) + // / \ + // (1) (2) + // | / \ + // (4) (5) (6) + G = BuildGraph(7, {{0, 1, 1.3}, + {0, 2, 0.2}, + {0, 3, 1}, + {1, 4, 3.2}, + {2, 5, 4.2}, + {2, 6, 0.7}}); + inc_w = {2.5, 4.5, 5.1, 1, 3.2, 4.2, 0.7}; + ASSERT_TRUE(inc_w_check(G, inc_w)); + EXPECT_NEAR(G.TotalWeight(), 10.6, 1e-6); + + // Graph without self-loops + // (0)--(1) + // | \ | \ + // | \ | \ + // (2)--(3)-(4) + G = BuildGraph(5, {{0, 1, 0.1}, + {0, 2, 0.2}, + {0, 3, 0.3}, + {1, 3, 0.4}, + {1, 4, 0.5}, + {2, 3, 0.6}, + {3, 4, 0.7}}); + inc_w = {0.6, 1, 0.8, 2, 1.2}; + ASSERT_TRUE(inc_w_check(G, inc_w)); + EXPECT_NEAR(G.TotalWeight(), 2.8, 1e-6); + + // Graph with self loop [*nodes have self loops] + // (0)--(1*) + // | \ | \ + // | \ | \ + // (2*)--(3)-(4*) + G = BuildGraph(5, {{0, 1, 0.1}, + {0, 2, 0.2}, + {0, 3, 0.3}, + {1, 3, 0.4}, + {1, 4, 0.5}, + {2, 3, 0.6}, + {3, 4, 0.7}, + {1, 1, 0.8}, + {2, 2, 0.9}, + {4, 4, 1}}); + inc_w = {0.6, 1.8, 1.7, 2, 2.2}; + ASSERT_TRUE(inc_w_check(G, inc_w)); + EXPECT_NEAR(G.TotalWeight(), 5.5, 1e-6); + + // Try to get incident weight of non-existing node + ASSERT_DEATH({ G.IncidentWeight(5); }, ""); + ASSERT_DEATH({ G.IncidentWeight(100); }, ""); +} + +TEST(Graph, Modularity) { + // Graph without edges + comdata::Graph G = GenRandomUnweightedGraph(5, 0); + ASSERT_EQ(G.Modularity(), 0); + + // Chain + // (0)--(1)--(2)--(3)--(4) + G = BuildGraph(5, {{0, 1, 0.1}, + {1, 2, 0.5}, + {2, 3, 2.3}, + {3, 4, 4.2}}); + std::vector c = {0, 1, 1, 2, 2}; + set_communities(&G, c); + EXPECT_NEAR(G.Modularity(), 0.37452886332076973, 1e-6); + + // Tree + // (0)--(3) + // / \ + // (1) (2) + // | / \ + // (4) (5) (6) + G = BuildGraph(7, {{0, 1, 1.3}, + {0, 2, 0.2}, + {0, 3, 1}, + {1, 4, 3.2}, + {2, 5, 4.2}, + {2, 6, 0.7}}); + c = {0, 0, 1, 0, 0, 1, 2}; + set_communities(&G, c); + EXPECT_NEAR(G.Modularity(), 0.6945087219651122, 1e-6); + + // Graph without self-loops + // (0)--(1) + // | \ | \ + // | \ | \ + // (2)--(3)-(4) + G = BuildGraph(5, {{0, 1, 0.1}, + {0, 2, 0.2}, + {0, 3, 0.3}, + {1, 3, 0.4}, + {1, 4, 0.5}, + {2, 3, 0.6}, + {3, 4, 0.7}}); + c = {0, 1, 1, 1, 1}; + set_communities(&G, c); + EXPECT_NEAR(G.Modularity(), 0.32653061224489793, 1e-6); + + // Graph with self loop [*nodes have self loops] + // (0)--(1*) + // | \ | \ + // | \ | \ + // (2*)--(3)-(4*) + G = BuildGraph(5, {{0, 1, 0.1}, + {0, 2, 0.2}, + {0, 3, 0.3}, + {1, 3, 0.4}, + {1, 4, 0.5}, + {2, 3, 0.6}, + {3, 4, 0.7}, + {1, 1, 0.8}, + {2, 2, 0.9}, + {4, 4, 1}}); + c = {0, 0, 0, 0, 1}; + set_communities(&G, c); + EXPECT_NEAR(G.Modularity(), 0.2754545454545455, 1e-6); +} diff --git a/query_modules/louvain/test/unit/utils.cpp b/query_modules/louvain/test/unit/utils.cpp new file mode 100644 index 000000000..681d7e1e8 --- /dev/null +++ b/query_modules/louvain/test/unit/utils.cpp @@ -0,0 +1,32 @@ +#include "utils.hpp" + +#include + +comdata::Graph BuildGraph( + uint32_t nodes, std::vector> edges) { + comdata::Graph G(nodes); + for (auto &edge : edges) + G.AddEdge(std::get<0>(edge), std::get<1>(edge), std::get<2>(edge)); + return G; +} + +comdata::Graph GenRandomUnweightedGraph(uint32_t nodes, uint32_t edges) { + auto seed = + std::chrono::high_resolution_clock::now().time_since_epoch().count(); + std::mt19937 rng(seed); + std::uniform_int_distribution dist(0, nodes - 1); + std::set> E; + for (uint32_t i = 0; i < edges; ++i) { + int u; + int v; + do { + u = dist(rng); + v = dist(rng); + if (u > v) std::swap(u, v); + } while (u == v || E.find({u, v, 1}) != E.end()); + E.insert({u, v, 1}); + } + return BuildGraph(nodes, std::vector>( + E.begin(), E.end())); +} + diff --git a/query_modules/louvain/test/unit/utils.hpp b/query_modules/louvain/test/unit/utils.hpp new file mode 100644 index 000000000..2fbb16123 --- /dev/null +++ b/query_modules/louvain/test/unit/utils.hpp @@ -0,0 +1,33 @@ +#pragma once + +#include +#include +#include +#include + +#include "data_structures/graph.hpp" + +/// This class is threadsafe +class Timer { + public: + Timer() : start_time_(std::chrono::steady_clock::now()) {} + + template > + TDuration Elapsed() const { + return std::chrono::duration_cast( + std::chrono::steady_clock::now() - start_time_); + } + + private: + std::chrono::steady_clock::time_point start_time_; +}; + +/// Builds the graph from a given number of nodes and a list of edges. +/// Nodes should be 0-indexed and each edge should be provided only once. +comdata::Graph BuildGraph( + uint32_t nodes, std::vector> edges); + +/// Generates random undirected graph with a given number of nodes and edges. +/// The generated graph is not picked out of a uniform distribution. All weights +/// are the same and equal to one. +comdata::Graph GenRandomUnweightedGraph(uint32_t nodes, uint32_t edges);