Add cmake files and a dummy louvain query module

Reviewers: teon.banek

Reviewed By: teon.banek

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D2561
This commit is contained in:
Ivan Paljak 2019-11-25 15:51:51 +01:00
parent fc146a71e0
commit 7467d52d5b
10 changed files with 576 additions and 0 deletions

View File

@ -17,3 +17,5 @@ install(PROGRAMS $<TARGET_FILE:example>
RENAME example.so)
# Also install the source of the example, so user can read it.
install(FILES example.c DESTINATION lib/memgraph/query_modules)
add_subdirectory(louvain)

View File

@ -0,0 +1,25 @@
set(MAIN src/main.cpp)
set(MODULE src/louvain_module.cpp)
set(SOURCES src/algorithms/louvain.cpp
src/data_structures/graph.cpp)
include_directories(src)
add_library(louvain-core STATIC ${SOURCES})
set_target_properties(louvain-core PROPERTIES POSITION_INDEPENDENT_CODE ON)
target_link_libraries(louvain-core PUBLIC Threads::Threads glog gflags)
add_executable(louvain-main ${MAIN})
target_link_libraries(louvain-main louvain-core)
enable_testing()
add_subdirectory(test)
add_library(louvain SHARED ${MODULE})
target_include_directories(louvain PRIVATE ${CMAKE_SOURCE_DIR}/include)
if (NOT MG_COMMUNITY)
install(PROGRAMS $<TARGET_FILE:louvain>
DESTINATION lib/memgraph/query_modules
RENAME louvain.so)
endif()

View File

@ -0,0 +1,22 @@
#include "mg_procedure.h"
static void communities(const mgp_list *args, const mgp_graph *graph,
mgp_result *result, mgp_memory *memory) {
mgp_result_record *record = mgp_result_new_record(result);
mgp_value *hello_world_value =
mgp_value_make_string("Louvain, fuck yeah!", memory);
mgp_result_record_insert(record, "result", hello_world_value);
mgp_value_destroy(hello_world_value);
}
extern "C" int mgp_init_module(struct mgp_module *module,
struct mgp_memory *memory) {
struct mgp_proc *proc =
mgp_module_add_read_procedure(module, "communities", communities);
if (!mgp_proc_add_result(proc, "result", mgp_type_string())) return 1;
return 0;
}
extern "C" int mgp_shutdown_module() {
return 0;
}

View File

@ -0,0 +1,28 @@
#include <iostream>
#include "algorithms/algorithms.hpp"
#include "data_structures/graph.hpp"
// A simple program that reads the graph from STDIN and
// outputs the detected communities from louvain along with
// its modularity measure on STDOUT.
int main() {
int n;
int m;
std::cin >> n >> m;
comdata::Graph G(n);
for (int i = 0; i < m; ++i) {
int a;
int b;
double c;
std::cin >> a >> b >> c;
G.AddEdge(a, b, c);
}
algorithms::Louvain(&G);
for (int i = 0; i < n; ++i)
std::cout << i << G.Community(i) << "\n";
std::cout << G.Modularity() << "\n";
return 0;
}

View File

@ -0,0 +1,80 @@
---
Checks: '*,
-android-*,
-cert-err58-cpp,
-cppcoreguidelines-avoid-c-arrays,
-cppcoreguidelines-avoid-goto,
-cppcoreguidelines-avoid-magic-numbers,
-cppcoreguidelines-macro-usage,
-cppcoreguidelines-no-malloc,
-cppcoreguidelines-non-private-member-variables-in-classes,
-cppcoreguidelines-owning-memory,
-cppcoreguidelines-pro-bounds-array-to-pointer-decay,
-cppcoreguidelines-pro-bounds-constant-array-index,
-cppcoreguidelines-pro-bounds-pointer-arithmetic,
-cppcoreguidelines-pro-type-member-init,
-cppcoreguidelines-pro-type-reinterpret-cast,
-cppcoreguidelines-pro-type-static-cast-downcast,
-cppcoreguidelines-pro-type-union-access,
-cppcoreguidelines-pro-type-vararg,
-cppcoreguidelines-special-member-functions,
-fuchsia-default-arguments,
-fuchsia-default-arguments-calls,
-fuchsia-default-arguments-declarations,
-fuchsia-overloaded-operator,
-fuchsia-statically-constructed-objects,
-fuchsia-trailing-return,
-fuchsia-virtual-inheritance,
-google-explicit-constructor,
-google-readability-*,
-hicpp-avoid-c-arrays,
-hicpp-avoid-goto,
-hicpp-braces-around-statements,
-hicpp-member-init,
-hicpp-no-array-decay,
-hicpp-no-assembler,
-hicpp-no-malloc,
-hicpp-special-member-functions,
-hicpp-use-equals-default,
-hicpp-vararg,
-llvm-header-guard,
-misc-non-private-member-variables-in-classes,
-misc-unused-parameters,
-modernize-avoid-c-arrays,
-modernize-concat-nested-namespaces,
-modernize-pass-by-value,
-modernize-use-equals-default,
-modernize-use-nodiscard,
-modernize-use-trailing-return-type,
-performance-unnecessary-value-param,
-readability-braces-around-statements,
-readability-else-after-return,
-readability-implicit-bool-conversion,
-readability-magic-numbers,
-readability-named-parameter'
WarningsAsErrors: ''
HeaderFilterRegex: ''
AnalyzeTemporaryDtors: false
FormatStyle: none
CheckOptions:
- key: google-readability-braces-around-statements.ShortStatementLines
value: '1'
- key: google-readability-function-size.StatementThreshold
value: '800'
- key: google-readability-namespace-comments.ShortNamespaceLines
value: '10'
- key: google-readability-namespace-comments.SpacesBeforeComments
value: '2'
- key: modernize-loop-convert.MaxCopySize
value: '16'
- key: modernize-loop-convert.MinConfidence
value: reasonable
- key: modernize-loop-convert.NamingStyle
value: CamelCase
- key: modernize-pass-by-value.IncludeStyle
value: llvm
- key: modernize-replace-auto-ptr.IncludeStyle
value: llvm
- key: modernize-use-nullptr.NullMacros
value: 'NULL'
...

View File

@ -0,0 +1,3 @@
include_directories(${GTEST_INCLUDE_DIR})
add_subdirectory(unit)

View File

@ -0,0 +1,28 @@
set(test_prefix louvain__unit__)
add_custom_target(louvain__unit)
add_library(louvain-test STATIC utils.cpp)
set_target_properties(louvain-test PROPERTIES POSITION_INDEPENDENT_CODE ON)
function(add_unit_test test_cpp)
# get exec name (remove extension from the abs path)
get_filename_component(exec_name ${test_cpp} NAME_WE)
set(target_name ${test_prefix}${exec_name})
add_executable(${target_name} ${test_cpp})
# OUTPUT_NAME sets the real name of a target when it is built and can be
# used to help create two targets of the same name even though CMake
# requires unique logical target names
set_target_properties(${target_name} PROPERTIES OUTPUT_NAME ${exec_name})
# TODO: this is a temporary workaround the test build warnings
target_compile_options(${target_name} PRIVATE -Wno-comment -Wno-sign-compare
-Wno-unused-variable)
target_link_libraries(${target_name} glog gflags gtest gtest_main Threads::Threads
louvain-core louvain-test)
# register test
add_test(${target_name} ${exec_name})
# add to unit target
add_dependencies(louvain__unit ${target_name})
endfunction(add_unit_test)
add_unit_test(graph.cpp)

View File

@ -0,0 +1,323 @@
#include <glog/logging.h>
#include <gtest/gtest.h>
#include "data_structures/graph.hpp"
#include "utils.hpp"
// Checks if commmunities of nodes in G correspond to a given community vector.
bool community_check(const comdata::Graph &G, const std::vector<uint32_t> &c) {
if (G.Size() != c.size()) return false;
for (uint32_t node_id = 0; node_id < G.Size(); ++node_id)
if (G.Community(node_id) != c[node_id])
return false;
return true;
}
// Checks if degrees of nodes in G correspond to a given degree vector.
bool degree_check(const comdata::Graph &G, const std::vector<uint32_t> &deg) {
if (G.Size() != deg.size()) return false;
for (uint32_t node_id = 0; node_id < G.Size(); ++node_id)
if (G.Degree(node_id) != deg[node_id])
return false;
return true;
}
// Checks if incident weights of nodes in G correspond to a given weight vector.
bool inc_w_check(const comdata::Graph &G, const std::vector<double> &inc_w) {
if (G.Size() != inc_w.size()) return false;
for (uint32_t node_id = 0; node_id < G.Size(); ++node_id)
if (std::abs(G.IncidentWeight(node_id) - inc_w[node_id]) > 1e-6)
return false;
return true;
}
// Sets communities of nodes in G. Returns true on success.
bool set_communities(comdata::Graph *G, const std::vector<uint32_t> &c) {
if (G->Size() != c.size()) return false;
for (uint32_t node_id = 0; node_id < G->Size(); ++node_id)
G->SetCommunity(node_id, c[node_id]);
return true;
}
TEST(Graph, Constructor) {
uint32_t nodes = 100;
comdata::Graph G(nodes);
ASSERT_EQ(G.Size(), nodes);
for (uint32_t node_id = 0; node_id < nodes; ++node_id) {
ASSERT_EQ(G.IncidentWeight(node_id), 0);
ASSERT_EQ(G.Community(node_id), node_id);
}
}
TEST(Graph, Size) {
comdata::Graph G1 = GenRandomUnweightedGraph(0, 0);
comdata::Graph G2 = GenRandomUnweightedGraph(42, 41);
comdata::Graph G3 = GenRandomUnweightedGraph(100, 250);
ASSERT_EQ(G1.Size(), 0);
ASSERT_EQ(G2.Size(), 42);
ASSERT_EQ(G3.Size(), 100);
}
TEST(Graph, Communities) {
comdata::Graph G = GenRandomUnweightedGraph(100, 250);
for (int i = 0; i < 100; ++i) G.SetCommunity(i, i % 5);
for (int i = 0; i < 100; ++i) ASSERT_EQ(G.Community(i), i % 5);
// Try to set communities on non-existing nodes
ASSERT_DEATH({ G.SetCommunity(100, 2); }, "");
ASSERT_DEATH({ G.SetCommunity(150, 0); }, "");
// Try to get a the community of a non-existing node
ASSERT_DEATH({ G.Community(100); }, "");
ASSERT_DEATH({ G.Community(150); }, "");
}
TEST(Graph, CommunityNormalization) {
// Communities are already normalized.
comdata::Graph G = GenRandomUnweightedGraph(5, 10);
std::vector<uint32_t> init_c = {0, 2, 1, 3, 4};
std::vector<uint32_t> final_c = {0, 2, 1, 3, 4};
ASSERT_TRUE(set_communities(&G, init_c));
G.NormalizeCommunities();
ASSERT_TRUE(community_check(G, final_c));
// Each node in its own community.
G = GenRandomUnweightedGraph(5, 10);
init_c = {20, 30, 10, 40, 50};
final_c = {1, 2, 0, 3, 4};
ASSERT_TRUE(set_communities(&G, init_c));
G.NormalizeCommunities();
ASSERT_TRUE(community_check(G, final_c));
// Multiple nodes in the same community
G = GenRandomUnweightedGraph(7, 10);
init_c = {13, 99, 13, 13, 1, 99, 1};
final_c = {1, 2, 1, 1, 0, 2, 0};
ASSERT_TRUE(set_communities(&G, init_c));
G.NormalizeCommunities();
ASSERT_TRUE(community_check(G, final_c));
}
TEST(Graph, AddEdge) {
comdata::Graph G = GenRandomUnweightedGraph(5, 0);
// Node out of bounds.
ASSERT_DEATH({ G.AddEdge(1, 5, 7); }, "");
// Repeated edge
G.AddEdge(1, 2, 1);
ASSERT_DEATH({ G.AddEdge(1, 2, 7); }, "");
// Non-positive edge weight
ASSERT_DEATH({ G.AddEdge(2, 3, -7); }, "");
ASSERT_DEATH({ G.AddEdge(3, 4, 0); }, "");
}
TEST(Graph, Degrees) {
// Graph without edges
comdata::Graph G = GenRandomUnweightedGraph(5, 0);
std::vector<uint32_t> deg = {0, 0, 0, 0, 0};
ASSERT_TRUE(degree_check(G, deg));
// Chain
// (0)--(1)--(2)--(3)--(4)
G = BuildGraph(5, {{0, 1, 1},
{1, 2, 1},
{2, 3, 1},
{3, 4, 1}});
deg = {1, 2, 2, 2, 1};
ASSERT_TRUE(degree_check(G, deg));
// Tree
// (0)--(3)
// / \
// (1) (2)
// | / \
// (4) (5) (6)
G = BuildGraph(7, {{0, 1, 1},
{0, 2, 1},
{0, 3, 1},
{1, 4, 1},
{2, 5, 1},
{2, 6, 1}});
deg = {3, 2, 3, 1, 1, 1, 1};
ASSERT_TRUE(degree_check(G, deg));
// Graph without self-loops
// (0)--(1)
// | \ | \
// | \ | \
// (2)--(3)-(4)
G = BuildGraph(5, {{0, 1, 1},
{0, 2, 1},
{0, 3, 1},
{1, 3, 1},
{1, 4, 1},
{2, 3, 1},
{3, 4, 1}});
deg = {3, 3, 2, 4, 2};
ASSERT_TRUE(degree_check(G, deg));
// Graph with self loop [*nodes have self loops]
// (0)--(1*)
// | \ | \
// | \ | \
// (2*)--(3)-(4*)
G = BuildGraph(5, {{0, 1, 1},
{0, 2, 1},
{0, 3, 1},
{1, 3, 1},
{1, 4, 1},
{2, 3, 1},
{3, 4, 1},
{1, 1, 1},
{2, 2, 2},
{4, 4, 4}});
deg = {3, 4, 3, 4, 3};
ASSERT_TRUE(degree_check(G, deg));
// Try to get degree of non-existing nodes
ASSERT_DEATH({ G.Degree(5); }, "");
ASSERT_DEATH({ G.Degree(100); }, "");
}
TEST(Graph, Weights) {
// Graph without edges
comdata::Graph G = GenRandomUnweightedGraph(5, 0);
std::vector<double> inc_w = {0, 0, 0, 0, 0};
ASSERT_TRUE(inc_w_check(G, inc_w));
ASSERT_EQ(G.TotalWeight(), 0);
// Chain
// (0)--(1)--(2)--(3)--(4)
G = BuildGraph(5, {{0, 1, 0.1},
{1, 2, 0.5},
{2, 3, 2.3},
{3, 4, 4.2}});
inc_w = {0.1, 0.6, 2.8, 6.5, 4.2};
ASSERT_TRUE(inc_w_check(G, inc_w));
ASSERT_NEAR(G.TotalWeight(), 7.1, 1e-6);
// Tree
// (0)--(3)
// / \
// (1) (2)
// | / \
// (4) (5) (6)
G = BuildGraph(7, {{0, 1, 1.3},
{0, 2, 0.2},
{0, 3, 1},
{1, 4, 3.2},
{2, 5, 4.2},
{2, 6, 0.7}});
inc_w = {2.5, 4.5, 5.1, 1, 3.2, 4.2, 0.7};
ASSERT_TRUE(inc_w_check(G, inc_w));
EXPECT_NEAR(G.TotalWeight(), 10.6, 1e-6);
// Graph without self-loops
// (0)--(1)
// | \ | \
// | \ | \
// (2)--(3)-(4)
G = BuildGraph(5, {{0, 1, 0.1},
{0, 2, 0.2},
{0, 3, 0.3},
{1, 3, 0.4},
{1, 4, 0.5},
{2, 3, 0.6},
{3, 4, 0.7}});
inc_w = {0.6, 1, 0.8, 2, 1.2};
ASSERT_TRUE(inc_w_check(G, inc_w));
EXPECT_NEAR(G.TotalWeight(), 2.8, 1e-6);
// Graph with self loop [*nodes have self loops]
// (0)--(1*)
// | \ | \
// | \ | \
// (2*)--(3)-(4*)
G = BuildGraph(5, {{0, 1, 0.1},
{0, 2, 0.2},
{0, 3, 0.3},
{1, 3, 0.4},
{1, 4, 0.5},
{2, 3, 0.6},
{3, 4, 0.7},
{1, 1, 0.8},
{2, 2, 0.9},
{4, 4, 1}});
inc_w = {0.6, 1.8, 1.7, 2, 2.2};
ASSERT_TRUE(inc_w_check(G, inc_w));
EXPECT_NEAR(G.TotalWeight(), 5.5, 1e-6);
// Try to get incident weight of non-existing node
ASSERT_DEATH({ G.IncidentWeight(5); }, "");
ASSERT_DEATH({ G.IncidentWeight(100); }, "");
}
TEST(Graph, Modularity) {
// Graph without edges
comdata::Graph G = GenRandomUnweightedGraph(5, 0);
ASSERT_EQ(G.Modularity(), 0);
// Chain
// (0)--(1)--(2)--(3)--(4)
G = BuildGraph(5, {{0, 1, 0.1},
{1, 2, 0.5},
{2, 3, 2.3},
{3, 4, 4.2}});
std::vector<uint32_t> c = {0, 1, 1, 2, 2};
set_communities(&G, c);
EXPECT_NEAR(G.Modularity(), 0.37452886332076973, 1e-6);
// Tree
// (0)--(3)
// / \
// (1) (2)
// | / \
// (4) (5) (6)
G = BuildGraph(7, {{0, 1, 1.3},
{0, 2, 0.2},
{0, 3, 1},
{1, 4, 3.2},
{2, 5, 4.2},
{2, 6, 0.7}});
c = {0, 0, 1, 0, 0, 1, 2};
set_communities(&G, c);
EXPECT_NEAR(G.Modularity(), 0.6945087219651122, 1e-6);
// Graph without self-loops
// (0)--(1)
// | \ | \
// | \ | \
// (2)--(3)-(4)
G = BuildGraph(5, {{0, 1, 0.1},
{0, 2, 0.2},
{0, 3, 0.3},
{1, 3, 0.4},
{1, 4, 0.5},
{2, 3, 0.6},
{3, 4, 0.7}});
c = {0, 1, 1, 1, 1};
set_communities(&G, c);
EXPECT_NEAR(G.Modularity(), 0.32653061224489793, 1e-6);
// Graph with self loop [*nodes have self loops]
// (0)--(1*)
// | \ | \
// | \ | \
// (2*)--(3)-(4*)
G = BuildGraph(5, {{0, 1, 0.1},
{0, 2, 0.2},
{0, 3, 0.3},
{1, 3, 0.4},
{1, 4, 0.5},
{2, 3, 0.6},
{3, 4, 0.7},
{1, 1, 0.8},
{2, 2, 0.9},
{4, 4, 1}});
c = {0, 0, 0, 0, 1};
set_communities(&G, c);
EXPECT_NEAR(G.Modularity(), 0.2754545454545455, 1e-6);
}

View File

@ -0,0 +1,32 @@
#include "utils.hpp"
#include <random>
comdata::Graph BuildGraph(
uint32_t nodes, std::vector<std::tuple<uint32_t, uint32_t, double>> edges) {
comdata::Graph G(nodes);
for (auto &edge : edges)
G.AddEdge(std::get<0>(edge), std::get<1>(edge), std::get<2>(edge));
return G;
}
comdata::Graph GenRandomUnweightedGraph(uint32_t nodes, uint32_t edges) {
auto seed =
std::chrono::high_resolution_clock::now().time_since_epoch().count();
std::mt19937 rng(seed);
std::uniform_int_distribution<uint32_t> dist(0, nodes - 1);
std::set<std::tuple<uint32_t, uint32_t, double>> E;
for (uint32_t i = 0; i < edges; ++i) {
int u;
int v;
do {
u = dist(rng);
v = dist(rng);
if (u > v) std::swap(u, v);
} while (u == v || E.find({u, v, 1}) != E.end());
E.insert({u, v, 1});
}
return BuildGraph(nodes, std::vector<std::tuple<uint32_t, uint32_t, double>>(
E.begin(), E.end()));
}

View File

@ -0,0 +1,33 @@
#pragma once
#include <chrono>
#include <random>
#include <set>
#include <tuple>
#include "data_structures/graph.hpp"
/// This class is threadsafe
class Timer {
public:
Timer() : start_time_(std::chrono::steady_clock::now()) {}
template <typename TDuration = std::chrono::duration<double>>
TDuration Elapsed() const {
return std::chrono::duration_cast<TDuration>(
std::chrono::steady_clock::now() - start_time_);
}
private:
std::chrono::steady_clock::time_point start_time_;
};
/// Builds the graph from a given number of nodes and a list of edges.
/// Nodes should be 0-indexed and each edge should be provided only once.
comdata::Graph BuildGraph(
uint32_t nodes, std::vector<std::tuple<uint32_t, uint32_t, double>> edges);
/// Generates random undirected graph with a given number of nodes and edges.
/// The generated graph is not picked out of a uniform distribution. All weights
/// are the same and equal to one.
comdata::Graph GenRandomUnweightedGraph(uint32_t nodes, uint32_t edges);