Add PoolResource benchmarking in execution and skiplist

Summary:
With a pool allocator, lookups in STL set and map are up to 50% faster.
This is probably due to contiguous memory of pooled objects, i.e. nodes
of those containers. In some cases, the lookup outperforms the SkipList.
Insertions are also faster, though not as dramatically, up to 30%. This
does make a significant difference when the STL containers are used in a
single thread as they outperform the SkipList significantly.

Reviewers: mferencevic, ipaljak

Reviewed By: mferencevic

Subscribers: pullbot

Differential Revision: https://phabricator.memgraph.io/D2326
This commit is contained in:
Teon Banek 2019-08-26 14:23:38 +02:00
parent 1fa9d7752c
commit 023538c19c
2 changed files with 197 additions and 1 deletions

View File

@ -31,6 +31,15 @@ class NewDeleteResource final {
void Reset() {}
};
class PoolResource final {
utils::PoolResource memory_{128, 4 * 1024};
public:
utils::MemoryResource *get() { return &memory_; }
void Reset() { memory_.Release(); }
};
static void AddVertices(database::GraphDb *db, int vertex_count) {
auto dba = db->Access();
for (int i = 0; i < vertex_count; i++) dba.InsertVertex();
@ -125,6 +134,10 @@ BENCHMARK_TEMPLATE(Distinct, MonotonicBufferResource)
->Range(1024, 1U << 21U)
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(Distinct, PoolResource)
->Range(1024, 1U << 21U)
->Unit(benchmark::kMicrosecond);
static query::plan::ExpandVariable MakeExpandVariable(
query::EdgeAtom::Type expand_type, query::SymbolTable *symbol_table) {
auto input_symbol = symbol_table->CreateSymbol("input", false);
@ -178,6 +191,10 @@ BENCHMARK_TEMPLATE(ExpandVariable, MonotonicBufferResource)
->Ranges({{1, 1U << 5U}, {512, 1U << 13U}})
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(ExpandVariable, PoolResource)
->Ranges({{1, 1U << 5U}, {512, 1U << 13U}})
->Unit(benchmark::kMicrosecond);
template <class TMemory>
// NOLINTNEXTLINE(google-runtime-references)
static void ExpandBfs(benchmark::State &state) {
@ -214,6 +231,10 @@ BENCHMARK_TEMPLATE(ExpandBfs, MonotonicBufferResource)
->Range(512, 1U << 19U)
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(ExpandBfs, PoolResource)
->Range(512, 1U << 19U)
->Unit(benchmark::kMicrosecond);
template <class TMemory>
// NOLINTNEXTLINE(google-runtime-references)
static void ExpandShortest(benchmark::State &state) {
@ -255,6 +276,10 @@ BENCHMARK_TEMPLATE(ExpandShortest, MonotonicBufferResource)
->Range(512, 1U << 20U)
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(ExpandShortest, PoolResource)
->Range(512, 1U << 20U)
->Unit(benchmark::kMicrosecond);
template <class TMemory>
// NOLINTNEXTLINE(google-runtime-references)
static void ExpandWeightedShortest(benchmark::State &state) {
@ -300,6 +325,10 @@ BENCHMARK_TEMPLATE(ExpandWeightedShortest, MonotonicBufferResource)
->Range(512, 1U << 20U)
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(ExpandWeightedShortest, PoolResource)
->Range(512, 1U << 20U)
->Unit(benchmark::kMicrosecond);
template <class TMemory>
// NOLINTNEXTLINE(google-runtime-references)
static void Accumulate(benchmark::State &state) {
@ -340,6 +369,10 @@ BENCHMARK_TEMPLATE(Accumulate, MonotonicBufferResource)
->Ranges({{4, 1U << 7U}, {512, 1U << 13U}})
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(Accumulate, PoolResource)
->Ranges({{4, 1U << 7U}, {512, 1U << 13U}})
->Unit(benchmark::kMicrosecond);
template <class TMemory>
// NOLINTNEXTLINE(google-runtime-references)
static void Aggregate(benchmark::State &state) {
@ -393,6 +426,10 @@ BENCHMARK_TEMPLATE(Aggregate, MonotonicBufferResource)
->Ranges({{4, 1U << 7U}, {512, 1U << 13U}})
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(Aggregate, PoolResource)
->Ranges({{4, 1U << 7U}, {512, 1U << 13U}})
->Unit(benchmark::kMicrosecond);
template <class TMemory>
// NOLINTNEXTLINE(google-runtime-references)
static void OrderBy(benchmark::State &state) {
@ -439,6 +476,10 @@ BENCHMARK_TEMPLATE(OrderBy, MonotonicBufferResource)
->Ranges({{4, 1U << 7U}, {512, 1U << 13U}})
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(OrderBy, PoolResource)
->Ranges({{4, 1U << 7U}, {512, 1U << 13U}})
->Unit(benchmark::kMicrosecond);
template <class TMemory>
// NOLINTNEXTLINE(google-runtime-references)
static void Unwind(benchmark::State &state) {
@ -478,4 +519,8 @@ BENCHMARK_TEMPLATE(Unwind, MonotonicBufferResource)
->Ranges({{4, 1U << 7U}, {512, 1U << 13U}})
->Unit(benchmark::kMicrosecond);
BENCHMARK_TEMPLATE(Unwind, PoolResource)
->Ranges({{4, 1U << 7U}, {512, 1U << 13U}})
->Unit(benchmark::kMicrosecond);
BENCHMARK_MAIN();

View File

@ -5,6 +5,7 @@
#include <benchmark/benchmark.h>
#include "utils/memory.hpp"
#include "utils/skip_list.hpp"
#include "utils/spin_lock.hpp"
@ -80,6 +81,42 @@ BENCHMARK_REGISTER_F(StdSetInsertFixture, Insert)
->Unit(benchmark::kNanosecond)
->UseRealTime();
class StdSetWithPoolAllocatorInsertFixture : public benchmark::Fixture {
protected:
void SetUp(const benchmark::State &state) override {
if (state.thread_index == 0) {
container.clear();
}
}
protected:
utils::PoolResource memory_{256U /* max_blocks_per_chunk */,
1024U /* max_block_size */,
utils::NewDeleteResource()};
std::set<uint64_t, std::less<>, utils::Allocator<uint64_t>> container{
&memory_};
utils::SpinLock lock;
};
BENCHMARK_DEFINE_F(StdSetWithPoolAllocatorInsertFixture, Insert)
(benchmark::State &state) {
std::mt19937 gen(state.thread_index);
std::uniform_int_distribution<uint64_t> dist(0, kMaxNum);
uint64_t counter = 0;
while (state.KeepRunning()) {
std::lock_guard<utils::SpinLock> guard(lock);
if (container.insert(dist(gen)).second) {
++counter;
}
}
state.SetItemsProcessed(counter);
}
BENCHMARK_REGISTER_F(StdSetWithPoolAllocatorInsertFixture, Insert)
->ThreadRange(1, kThreadsNum)
->Unit(benchmark::kNanosecond)
->UseRealTime();
///////////////////////////////////////////////////////////////////////////////
// utils::SkipList set Find
///////////////////////////////////////////////////////////////////////////////
@ -154,6 +191,44 @@ BENCHMARK_REGISTER_F(StdSetFindFixture, Find)
->Unit(benchmark::kNanosecond)
->UseRealTime();
class StdSetWithPoolAllocatorFindFixture : public benchmark::Fixture {
protected:
void SetUp(const benchmark::State &state) override {
if (state.thread_index == 0 && container.size() == 0) {
for (uint64_t i = 0; i < kMaxNum; ++i) {
container.insert(i);
}
}
}
protected:
utils::PoolResource memory_{256U /* max_blocks_per_chunk */,
1024U /* max_block_size */,
utils::NewDeleteResource()};
std::set<uint64_t, std::less<>, utils::Allocator<uint64_t>> container{
&memory_};
utils::SpinLock lock;
};
BENCHMARK_DEFINE_F(StdSetWithPoolAllocatorFindFixture, Find)
(benchmark::State &state) {
std::mt19937 gen(state.thread_index);
std::uniform_int_distribution<uint64_t> dist(0, kMaxNum);
uint64_t counter = 0;
while (state.KeepRunning()) {
std::lock_guard<utils::SpinLock> guard(lock);
if (container.find(dist(gen)) != container.end()) {
++counter;
}
}
state.SetItemsProcessed(counter);
}
BENCHMARK_REGISTER_F(StdSetWithPoolAllocatorFindFixture, Find)
->ThreadRange(1, kThreadsNum)
->Unit(benchmark::kNanosecond)
->UseRealTime();
///////////////////////////////////////////////////////////////////////////////
// Map tests common
///////////////////////////////////////////////////////////////////////////////
@ -178,7 +253,7 @@ class SkipListMapInsertFixture : public benchmark::Fixture {
protected:
void SetUp(const benchmark::State &state) override {
if (state.thread_index == 0) {
list = utils::SkipList<MapObject>();;
list = utils::SkipList<MapObject>();
}
}
@ -239,6 +314,43 @@ BENCHMARK_REGISTER_F(StdMapInsertFixture, Insert)
->Unit(benchmark::kNanosecond)
->UseRealTime();
class StdMapWithPoolAllocatorInsertFixture : public benchmark::Fixture {
protected:
void SetUp(const benchmark::State &state) override {
if (state.thread_index == 0) {
container = {};
}
}
protected:
utils::PoolResource memory_{256U /* max_blocks_per_chunk */,
1024U /* max_block_size */,
utils::NewDeleteResource()};
std::map<uint64_t, uint64_t, std::less<>,
utils::Allocator<std::pair<uint64_t, uint64_t>>>
container{&memory_};
utils::SpinLock lock;
};
BENCHMARK_DEFINE_F(StdMapWithPoolAllocatorInsertFixture, Insert)
(benchmark::State &state) {
std::mt19937 gen(state.thread_index);
std::uniform_int_distribution<uint64_t> dist(0, kMaxNum);
uint64_t counter = 0;
while (state.KeepRunning()) {
std::lock_guard<utils::SpinLock> guard(lock);
if (container.insert({dist(gen), 0}).second) {
++counter;
}
}
state.SetItemsProcessed(counter);
}
BENCHMARK_REGISTER_F(StdMapWithPoolAllocatorInsertFixture, Insert)
->ThreadRange(1, kThreadsNum)
->Unit(benchmark::kNanosecond)
->UseRealTime();
///////////////////////////////////////////////////////////////////////////////
// utils::SkipList map Find
///////////////////////////////////////////////////////////////////////////////
@ -313,4 +425,43 @@ BENCHMARK_REGISTER_F(StdMapFindFixture, Find)
->Unit(benchmark::kNanosecond)
->UseRealTime();
class StdMapWithPoolAllocatorFindFixture : public benchmark::Fixture {
protected:
void SetUp(const benchmark::State &state) override {
if (state.thread_index == 0 && container.size() == 0) {
for (uint64_t i = 0; i < kMaxNum; ++i) {
container.insert({i, 0});
}
}
}
protected:
utils::PoolResource memory_{256U /* max_blocks_per_chunk */,
1024U /* max_block_size */,
utils::NewDeleteResource()};
std::map<uint64_t, uint64_t, std::less<>,
utils::Allocator<std::pair<uint64_t, uint64_t>>>
container{&memory_};
utils::SpinLock lock;
};
BENCHMARK_DEFINE_F(StdMapWithPoolAllocatorFindFixture, Find)
(benchmark::State &state) {
std::mt19937 gen(state.thread_index);
std::uniform_int_distribution<uint64_t> dist(0, kMaxNum);
uint64_t counter = 0;
while (state.KeepRunning()) {
std::lock_guard<utils::SpinLock> guard(lock);
if (container.find(dist(gen)) != container.end()) {
++counter;
}
}
state.SetItemsProcessed(counter);
}
BENCHMARK_REGISTER_F(StdMapWithPoolAllocatorFindFixture, Find)
->ThreadRange(1, kThreadsNum)
->Unit(benchmark::kNanosecond)
->UseRealTime();
BENCHMARK_MAIN();