Merge branch 'T115' into dev
This commit is contained in:
commit
a215e185c6
67
include/data_structures/bloom/bloom_filter.hpp
Normal file
67
include/data_structures/bloom/bloom_filter.hpp
Normal file
@ -0,0 +1,67 @@
|
||||
#include <bitset>
|
||||
#include <iostream>
|
||||
#include <vector>
|
||||
|
||||
/*
|
||||
Implementation of a generic Bloom Filter.
|
||||
|
||||
Read more about bloom filters here:
|
||||
http://en.wikipedia.org/wiki/Bloom_filter
|
||||
http://www.jasondavies.com/bloomfilter/
|
||||
*/
|
||||
|
||||
// Type specifies the type of data stored
|
||||
template <class Type, int BucketSize = 8>
|
||||
class BloomFilter {
|
||||
private:
|
||||
using HashFunction = std::function<uint64_t(const Type&)>;
|
||||
using CompresionFunction = std::function<int(uint64_t)>;
|
||||
|
||||
std::bitset<BucketSize> filter_;
|
||||
std::vector<HashFunction> hashes_;
|
||||
CompresionFunction compression_;
|
||||
std::vector<int> buckets;
|
||||
|
||||
int default_compression(uint64_t hash) { return hash % BucketSize; }
|
||||
|
||||
void get_buckets(const Type& data) {
|
||||
for (int i = 0; i < hashes_.size(); i++)
|
||||
buckets[i] = compression_(hashes_[i](data));
|
||||
}
|
||||
|
||||
void print_buckets(std::vector<uint64_t>& buckets) {
|
||||
for (int i = 0; i < buckets.size(); i++) {
|
||||
std::cout << buckets[i] << " ";
|
||||
}
|
||||
std::cout << std::endl;
|
||||
}
|
||||
|
||||
public:
|
||||
BloomFilter(std::vector<HashFunction> funcs,
|
||||
CompresionFunction compression = {})
|
||||
: hashes_(funcs) {
|
||||
if (!compression)
|
||||
compression_ = std::bind(&BloomFilter::default_compression, this,
|
||||
std::placeholders::_1);
|
||||
else
|
||||
compression_ = compression;
|
||||
|
||||
buckets.resize(hashes_.size());
|
||||
}
|
||||
|
||||
bool contains(const Type& data) {
|
||||
get_buckets(data);
|
||||
bool contains_element = true;
|
||||
|
||||
for (int i = 0; i < buckets.size(); i++)
|
||||
contains_element &= filter_[buckets[i]];
|
||||
|
||||
return contains_element;
|
||||
}
|
||||
|
||||
void insert(const Type& data) {
|
||||
get_buckets(data);
|
||||
|
||||
for (int i = 0; i < buckets.size(); i++) filter_[buckets[i]] = true;
|
||||
}
|
||||
};
|
36
include/data_structures/concurrent/concurrent_bloom_map.hpp
Normal file
36
include/data_structures/concurrent/concurrent_bloom_map.hpp
Normal file
@ -0,0 +1,36 @@
|
||||
#pragma once
|
||||
|
||||
#include "data_structures/concurrent/common.hpp"
|
||||
#include "data_structures/concurrent/skiplist.hpp"
|
||||
#include "data_structures/concurrent/concurrent_map.hpp"
|
||||
|
||||
|
||||
using std::pair;
|
||||
|
||||
template <class Key, class Value, class BloomFilter>
|
||||
class ConcurrentBloomMap {
|
||||
using item_t = Item<Key, Value>;
|
||||
using list_it = typename SkipList<item_t>::Iterator;
|
||||
|
||||
private:
|
||||
ConcurrentMap<Key, Value> map_;
|
||||
BloomFilter filter_;
|
||||
|
||||
public:
|
||||
ConcurrentBloomMap(BloomFilter filter) : filter_(filter) {}
|
||||
|
||||
std::pair<list_it, bool> insert(const Key &key, const Value &data) {
|
||||
filter_.insert(key);
|
||||
|
||||
auto accessor = std::move(map_.access());
|
||||
|
||||
return accessor.insert(key, data);
|
||||
}
|
||||
|
||||
bool contains(const Key &key) {
|
||||
if (!filter_.contains(key)) return false;
|
||||
|
||||
auto accessor = map_.access();
|
||||
return accessor.contains(key);
|
||||
}
|
||||
};
|
59
tests/benchmark/data_structures/bloom/basic_bloom_filter.cpp
Normal file
59
tests/benchmark/data_structures/bloom/basic_bloom_filter.cpp
Normal file
@ -0,0 +1,59 @@
|
||||
#include <random>
|
||||
#include <thread>
|
||||
|
||||
#include "data_structures/bloom/bloom_filter.hpp"
|
||||
#include "logging/default.hpp"
|
||||
#include "logging/streams/stdout.hpp"
|
||||
#include "utils/command_line/arguments.hpp"
|
||||
#include "utils/hashing/fnv64.hpp"
|
||||
#include "utils/random/generator.h"
|
||||
|
||||
#include "benchmark/benchmark_api.h"
|
||||
|
||||
using utils::random::StringGenerator;
|
||||
using StringHashFunction = std::function<uint64_t(const std::string&)>;
|
||||
|
||||
template <class Type, int Size>
|
||||
static void TestBloom(benchmark::State& state, BloomFilter<Type, Size>*
|
||||
bloom, const std::vector<Type>& elements) {
|
||||
while(state.KeepRunning()) {
|
||||
for (int start = 0; start < state.range(0); start++)
|
||||
if (start % 2) bloom->contains(elements[start]);
|
||||
else bloom->insert(elements[start]);
|
||||
}
|
||||
state.SetComplexityN(state.range(0));
|
||||
}
|
||||
|
||||
auto BM_Bloom = [](benchmark::State& state, auto* bloom, const auto& elements) {
|
||||
TestBloom(state, bloom, elements);
|
||||
};
|
||||
|
||||
void parse_args(int argc, char** argv) {}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
logging::init_async();
|
||||
logging::log->pipe(std::make_unique<Stdout>());
|
||||
|
||||
parse_args(argc, argv);
|
||||
|
||||
StringGenerator generator(4);
|
||||
|
||||
auto elements = utils::random::generate_vector(generator, 1 << 16);
|
||||
|
||||
StringHashFunction hash1 = fnv64<std::string>;
|
||||
StringHashFunction hash2 = fnv1a64<std::string>;
|
||||
std::vector<StringHashFunction> funcs = {
|
||||
hash1, hash2
|
||||
};
|
||||
|
||||
BloomFilter<std::string, 128> bloom(funcs);
|
||||
|
||||
benchmark::RegisterBenchmark("SimpleBloomFilter Benchmark Test", BM_Bloom,
|
||||
&bloom, elements)
|
||||
->RangeMultiplier(2)
|
||||
->Range(1, 1 << 16)
|
||||
->Complexity(benchmark::oN);
|
||||
|
||||
benchmark::Initialize(&argc, argv);
|
||||
benchmark::RunSpecifiedBenchmarks();
|
||||
}
|
@ -0,0 +1,186 @@
|
||||
#include <random>
|
||||
#include <thread>
|
||||
|
||||
#include "data_structures/bloom/bloom_filter.hpp"
|
||||
#include "data_structures/concurrent/concurrent_bloom_map.hpp"
|
||||
#include "logging/default.hpp"
|
||||
#include "logging/streams/stdout.hpp"
|
||||
#include "utils/command_line/arguments.hpp"
|
||||
#include "utils/hashing/fnv64.hpp"
|
||||
#include "utils/random/generator.h"
|
||||
|
||||
#include "benchmark/benchmark_api.h"
|
||||
|
||||
/*
|
||||
ConcurrentMap Benchmark Test:
|
||||
- tests time of Insertion, Contain and Delete operations
|
||||
|
||||
- benchmarking time per operation
|
||||
|
||||
- test run ConcurrentMap with the following keys and values:
|
||||
- <int,int>
|
||||
- <int, string>
|
||||
- <string, int>
|
||||
- <string, string>
|
||||
*/
|
||||
|
||||
using utils::random::NumberGenerator;
|
||||
using utils::random::PairGenerator;
|
||||
using utils::random::StringGenerator;
|
||||
using StringHashFunction = std::function<uint64_t(const std::string&)>;
|
||||
|
||||
using IntegerGenerator = NumberGenerator<std::uniform_int_distribution<int>,
|
||||
std::default_random_engine, int>;
|
||||
|
||||
// Global arguments
|
||||
int MAX_ELEMENTS = 1 << 18, MULTIPLIER = 2;
|
||||
int THREADS, RANGE_START, RANGE_END, STRING_LENGTH;
|
||||
|
||||
/*
|
||||
ConcurrentMap Insertion Benchmark Test
|
||||
*/
|
||||
template <class K, class V, class F>
|
||||
static void InsertValue(benchmark::State& state, ConcurrentBloomMap<K, V, F>* map,
|
||||
const std::vector<std::pair<K, V>>& elements) {
|
||||
while (state.KeepRunning()) {
|
||||
for (int start = 0; start < state.range(0); start++) {
|
||||
map->insert(elements[start].first, elements[start].second);
|
||||
}
|
||||
}
|
||||
state.SetComplexityN(state.range(0));
|
||||
}
|
||||
|
||||
/*
|
||||
ConcurrentMap Contains Benchmark Test
|
||||
*/
|
||||
template <class K, class V, class F>
|
||||
static void ContainsValue(benchmark::State& state, ConcurrentBloomMap<K, V, F>* map,
|
||||
const std::vector<std::pair<K, V>> elements) {
|
||||
while (state.KeepRunning()) {
|
||||
for (int start = 0; start < state.range(0); start++) {
|
||||
map->contains(elements[start].first);
|
||||
}
|
||||
}
|
||||
state.SetComplexityN(state.range(0));
|
||||
}
|
||||
|
||||
auto BM_InsertValue = [](benchmark::State& state, auto* map, auto& elements) {
|
||||
InsertValue(state, map, elements);
|
||||
};
|
||||
|
||||
auto BM_ContainsValue = [](benchmark::State& state, auto* map, auto elements) {
|
||||
ContainsValue(state, map, elements);
|
||||
};
|
||||
|
||||
/*
|
||||
Commandline Argument Parsing
|
||||
|
||||
Arguments:
|
||||
* Integer Range Minimum
|
||||
-start number
|
||||
|
||||
* Integer Range Maximum
|
||||
- end number
|
||||
|
||||
* Number of threads
|
||||
- threads number
|
||||
|
||||
* Random String lenght
|
||||
-string-length number
|
||||
*/
|
||||
void parse_arguments(int argc, char** argv) {
|
||||
REGISTER_ARGS(argc, argv);
|
||||
|
||||
RANGE_START = GET_ARG("-start", "0").get_int();
|
||||
RANGE_END = GET_ARG("-end", "1000000000").get_int();
|
||||
|
||||
THREADS = std::min(GET_ARG("-threads", "1").get_int(),
|
||||
(int)std::thread::hardware_concurrency());
|
||||
|
||||
STRING_LENGTH =
|
||||
ProgramArguments::instance().get_arg("-string-length", "128").get_int();
|
||||
}
|
||||
|
||||
int main(int argc, char** argv) {
|
||||
logging::init_async();
|
||||
logging::log->pipe(std::make_unique<Stdout>());
|
||||
|
||||
parse_arguments(argc, argv);
|
||||
|
||||
StringGenerator sg(STRING_LENGTH);
|
||||
IntegerGenerator ig(RANGE_START, RANGE_END);
|
||||
|
||||
/*
|
||||
Creates RandomGenerators, ConcurentMaps and Random Element Vectors for the
|
||||
following use cases:
|
||||
|
||||
Map elements contain keys and value for:
|
||||
<int, int>,
|
||||
<int, string>
|
||||
<string, int>
|
||||
<string, string>
|
||||
*/
|
||||
|
||||
// random generators for tests
|
||||
PairGenerator<IntegerGenerator, IntegerGenerator> piig(&ig, &ig);
|
||||
PairGenerator<StringGenerator, StringGenerator> pssg(&sg, &sg);
|
||||
PairGenerator<StringGenerator, IntegerGenerator> psig(&sg, &ig);
|
||||
PairGenerator<IntegerGenerator, StringGenerator> pisg(&ig, &sg);
|
||||
|
||||
StringHashFunction hash1 = fnv64<std::string>;
|
||||
StringHashFunction hash2 = fnv1a64<std::string>;
|
||||
std::vector<StringHashFunction> funcs = {
|
||||
hash1, hash2
|
||||
};
|
||||
|
||||
BloomFilter<std::string, 128> bloom_filter_(funcs);
|
||||
|
||||
// maps used for testing
|
||||
//ConcurrentBloomMap<int, int> ii_map;
|
||||
//ConcurrentBloomMap<int, std::string> is_map;
|
||||
using Filter = BloomFilter<std::string, 128>;
|
||||
ConcurrentBloomMap<std::string, int, Filter > si_map(bloom_filter_);
|
||||
ConcurrentBloomMap<std::string, std::string, Filter>
|
||||
ss_map(bloom_filter_);
|
||||
|
||||
// random elements for testing
|
||||
//auto ii_elems = utils::random::generate_vector(piig, MAX_ELEMENTS);
|
||||
//auto is_elems = utils::random::generate_vector(pisg, MAX_ELEMENTS);
|
||||
auto si_elems = utils::random::generate_vector(psig, MAX_ELEMENTS);
|
||||
auto ss_elems = utils::random::generate_vector(pssg, MAX_ELEMENTS);
|
||||
|
||||
/* insertion Tests */
|
||||
benchmark::RegisterBenchmark("InsertValue[String, Int]", BM_InsertValue,
|
||||
&si_map, si_elems)
|
||||
->RangeMultiplier(MULTIPLIER)
|
||||
->Range(1, MAX_ELEMENTS)
|
||||
->Complexity(benchmark::oN)
|
||||
->Threads(THREADS);
|
||||
|
||||
benchmark::RegisterBenchmark("InsertValue[String, String]", BM_InsertValue,
|
||||
&ss_map, ss_elems)
|
||||
->RangeMultiplier(MULTIPLIER)
|
||||
->Range(1, MAX_ELEMENTS)
|
||||
->Complexity(benchmark::oN)
|
||||
->Threads(THREADS);
|
||||
|
||||
// Contains Benchmark Tests
|
||||
benchmark::RegisterBenchmark("ContainsValue[String, Int]", BM_ContainsValue,
|
||||
&si_map, si_elems)
|
||||
->RangeMultiplier(MULTIPLIER)
|
||||
->Range(1, MAX_ELEMENTS)
|
||||
->Complexity(benchmark::oN)
|
||||
->Threads(THREADS);
|
||||
|
||||
benchmark::RegisterBenchmark("ContainsValue[String, String]",
|
||||
BM_ContainsValue, &ss_map, ss_elems)
|
||||
->RangeMultiplier(MULTIPLIER)
|
||||
->Range(1, MAX_ELEMENTS)
|
||||
->Complexity(benchmark::oN)
|
||||
->Threads(THREADS);
|
||||
|
||||
benchmark::Initialize(&argc, argv);
|
||||
benchmark::RunSpecifiedBenchmarks();
|
||||
|
||||
return 0;
|
||||
}
|
45
tests/unit/basic_bloom_filter.cpp
Normal file
45
tests/unit/basic_bloom_filter.cpp
Normal file
@ -0,0 +1,45 @@
|
||||
#define CATCH_CONFIG_MAIN
|
||||
#include "catch.hpp"
|
||||
|
||||
#include "utils/command_line/arguments.hpp"
|
||||
#include "utils/hashing/fnv64.hpp"
|
||||
|
||||
#include "data_structures/bloom/bloom_filter.hpp"
|
||||
|
||||
#pragma clang diagnostic push
|
||||
#pragma clang diagnostic ignored "-Wwritable-strings"
|
||||
|
||||
using StringHashFunction = std::function<uint64_t(const std::string&)>;
|
||||
|
||||
TEST_CASE("BloomFilter Test") {
|
||||
StringHashFunction hash1 = fnv64<std::string>;
|
||||
StringHashFunction hash2 = fnv1a64<std::string>;
|
||||
|
||||
auto c = [](auto x) -> int {
|
||||
return x % 4;
|
||||
} ;
|
||||
std::vector<StringHashFunction> funcs = {
|
||||
hash1, hash2
|
||||
};
|
||||
|
||||
BloomFilter<std::string, 64> bloom(funcs);
|
||||
|
||||
std::string test = "test";
|
||||
std::string kifla = "kifla";
|
||||
|
||||
std::cout << hash1(test) << std::endl;
|
||||
std::cout << hash2(test) << std::endl;
|
||||
|
||||
std::cout << hash1(kifla) << std::endl;
|
||||
std::cout << hash2(kifla) << std::endl;
|
||||
|
||||
std::cout << bloom.contains(test) << std::endl;
|
||||
bloom.insert(test);
|
||||
std::cout << bloom.contains(test) << std::endl;
|
||||
|
||||
std::cout << bloom.contains(kifla) << std::endl;
|
||||
bloom.insert(kifla);
|
||||
std::cout << bloom.contains(kifla) << std::endl;
|
||||
}
|
||||
|
||||
#pragma clang diagnostic pop
|
Loading…
Reference in New Issue
Block a user