Merge branch 'T115' into dev

This commit is contained in:
sale 2016-12-12 09:21:41 +00:00
commit a215e185c6
5 changed files with 393 additions and 0 deletions

View File

@ -0,0 +1,67 @@
#include <bitset>
#include <iostream>
#include <vector>
/*
Implementation of a generic Bloom Filter.
Read more about bloom filters here:
http://en.wikipedia.org/wiki/Bloom_filter
http://www.jasondavies.com/bloomfilter/
*/
// Type specifies the type of data stored
template <class Type, int BucketSize = 8>
class BloomFilter {
private:
using HashFunction = std::function<uint64_t(const Type&)>;
using CompresionFunction = std::function<int(uint64_t)>;
std::bitset<BucketSize> filter_;
std::vector<HashFunction> hashes_;
CompresionFunction compression_;
std::vector<int> buckets;
int default_compression(uint64_t hash) { return hash % BucketSize; }
void get_buckets(const Type& data) {
for (int i = 0; i < hashes_.size(); i++)
buckets[i] = compression_(hashes_[i](data));
}
void print_buckets(std::vector<uint64_t>& buckets) {
for (int i = 0; i < buckets.size(); i++) {
std::cout << buckets[i] << " ";
}
std::cout << std::endl;
}
public:
BloomFilter(std::vector<HashFunction> funcs,
CompresionFunction compression = {})
: hashes_(funcs) {
if (!compression)
compression_ = std::bind(&BloomFilter::default_compression, this,
std::placeholders::_1);
else
compression_ = compression;
buckets.resize(hashes_.size());
}
bool contains(const Type& data) {
get_buckets(data);
bool contains_element = true;
for (int i = 0; i < buckets.size(); i++)
contains_element &= filter_[buckets[i]];
return contains_element;
}
void insert(const Type& data) {
get_buckets(data);
for (int i = 0; i < buckets.size(); i++) filter_[buckets[i]] = true;
}
};

View File

@ -0,0 +1,36 @@
#pragma once
#include "data_structures/concurrent/common.hpp"
#include "data_structures/concurrent/skiplist.hpp"
#include "data_structures/concurrent/concurrent_map.hpp"
using std::pair;
template <class Key, class Value, class BloomFilter>
class ConcurrentBloomMap {
using item_t = Item<Key, Value>;
using list_it = typename SkipList<item_t>::Iterator;
private:
ConcurrentMap<Key, Value> map_;
BloomFilter filter_;
public:
ConcurrentBloomMap(BloomFilter filter) : filter_(filter) {}
std::pair<list_it, bool> insert(const Key &key, const Value &data) {
filter_.insert(key);
auto accessor = std::move(map_.access());
return accessor.insert(key, data);
}
bool contains(const Key &key) {
if (!filter_.contains(key)) return false;
auto accessor = map_.access();
return accessor.contains(key);
}
};

View File

@ -0,0 +1,59 @@
#include <random>
#include <thread>
#include "data_structures/bloom/bloom_filter.hpp"
#include "logging/default.hpp"
#include "logging/streams/stdout.hpp"
#include "utils/command_line/arguments.hpp"
#include "utils/hashing/fnv64.hpp"
#include "utils/random/generator.h"
#include "benchmark/benchmark_api.h"
using utils::random::StringGenerator;
using StringHashFunction = std::function<uint64_t(const std::string&)>;
template <class Type, int Size>
static void TestBloom(benchmark::State& state, BloomFilter<Type, Size>*
bloom, const std::vector<Type>& elements) {
while(state.KeepRunning()) {
for (int start = 0; start < state.range(0); start++)
if (start % 2) bloom->contains(elements[start]);
else bloom->insert(elements[start]);
}
state.SetComplexityN(state.range(0));
}
auto BM_Bloom = [](benchmark::State& state, auto* bloom, const auto& elements) {
TestBloom(state, bloom, elements);
};
void parse_args(int argc, char** argv) {}
int main(int argc, char** argv) {
logging::init_async();
logging::log->pipe(std::make_unique<Stdout>());
parse_args(argc, argv);
StringGenerator generator(4);
auto elements = utils::random::generate_vector(generator, 1 << 16);
StringHashFunction hash1 = fnv64<std::string>;
StringHashFunction hash2 = fnv1a64<std::string>;
std::vector<StringHashFunction> funcs = {
hash1, hash2
};
BloomFilter<std::string, 128> bloom(funcs);
benchmark::RegisterBenchmark("SimpleBloomFilter Benchmark Test", BM_Bloom,
&bloom, elements)
->RangeMultiplier(2)
->Range(1, 1 << 16)
->Complexity(benchmark::oN);
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
}

View File

@ -0,0 +1,186 @@
#include <random>
#include <thread>
#include "data_structures/bloom/bloom_filter.hpp"
#include "data_structures/concurrent/concurrent_bloom_map.hpp"
#include "logging/default.hpp"
#include "logging/streams/stdout.hpp"
#include "utils/command_line/arguments.hpp"
#include "utils/hashing/fnv64.hpp"
#include "utils/random/generator.h"
#include "benchmark/benchmark_api.h"
/*
ConcurrentMap Benchmark Test:
- tests time of Insertion, Contain and Delete operations
- benchmarking time per operation
- test run ConcurrentMap with the following keys and values:
- <int,int>
- <int, string>
- <string, int>
- <string, string>
*/
using utils::random::NumberGenerator;
using utils::random::PairGenerator;
using utils::random::StringGenerator;
using StringHashFunction = std::function<uint64_t(const std::string&)>;
using IntegerGenerator = NumberGenerator<std::uniform_int_distribution<int>,
std::default_random_engine, int>;
// Global arguments
int MAX_ELEMENTS = 1 << 18, MULTIPLIER = 2;
int THREADS, RANGE_START, RANGE_END, STRING_LENGTH;
/*
ConcurrentMap Insertion Benchmark Test
*/
template <class K, class V, class F>
static void InsertValue(benchmark::State& state, ConcurrentBloomMap<K, V, F>* map,
const std::vector<std::pair<K, V>>& elements) {
while (state.KeepRunning()) {
for (int start = 0; start < state.range(0); start++) {
map->insert(elements[start].first, elements[start].second);
}
}
state.SetComplexityN(state.range(0));
}
/*
ConcurrentMap Contains Benchmark Test
*/
template <class K, class V, class F>
static void ContainsValue(benchmark::State& state, ConcurrentBloomMap<K, V, F>* map,
const std::vector<std::pair<K, V>> elements) {
while (state.KeepRunning()) {
for (int start = 0; start < state.range(0); start++) {
map->contains(elements[start].first);
}
}
state.SetComplexityN(state.range(0));
}
auto BM_InsertValue = [](benchmark::State& state, auto* map, auto& elements) {
InsertValue(state, map, elements);
};
auto BM_ContainsValue = [](benchmark::State& state, auto* map, auto elements) {
ContainsValue(state, map, elements);
};
/*
Commandline Argument Parsing
Arguments:
* Integer Range Minimum
-start number
* Integer Range Maximum
- end number
* Number of threads
- threads number
* Random String lenght
-string-length number
*/
void parse_arguments(int argc, char** argv) {
REGISTER_ARGS(argc, argv);
RANGE_START = GET_ARG("-start", "0").get_int();
RANGE_END = GET_ARG("-end", "1000000000").get_int();
THREADS = std::min(GET_ARG("-threads", "1").get_int(),
(int)std::thread::hardware_concurrency());
STRING_LENGTH =
ProgramArguments::instance().get_arg("-string-length", "128").get_int();
}
int main(int argc, char** argv) {
logging::init_async();
logging::log->pipe(std::make_unique<Stdout>());
parse_arguments(argc, argv);
StringGenerator sg(STRING_LENGTH);
IntegerGenerator ig(RANGE_START, RANGE_END);
/*
Creates RandomGenerators, ConcurentMaps and Random Element Vectors for the
following use cases:
Map elements contain keys and value for:
<int, int>,
<int, string>
<string, int>
<string, string>
*/
// random generators for tests
PairGenerator<IntegerGenerator, IntegerGenerator> piig(&ig, &ig);
PairGenerator<StringGenerator, StringGenerator> pssg(&sg, &sg);
PairGenerator<StringGenerator, IntegerGenerator> psig(&sg, &ig);
PairGenerator<IntegerGenerator, StringGenerator> pisg(&ig, &sg);
StringHashFunction hash1 = fnv64<std::string>;
StringHashFunction hash2 = fnv1a64<std::string>;
std::vector<StringHashFunction> funcs = {
hash1, hash2
};
BloomFilter<std::string, 128> bloom_filter_(funcs);
// maps used for testing
//ConcurrentBloomMap<int, int> ii_map;
//ConcurrentBloomMap<int, std::string> is_map;
using Filter = BloomFilter<std::string, 128>;
ConcurrentBloomMap<std::string, int, Filter > si_map(bloom_filter_);
ConcurrentBloomMap<std::string, std::string, Filter>
ss_map(bloom_filter_);
// random elements for testing
//auto ii_elems = utils::random::generate_vector(piig, MAX_ELEMENTS);
//auto is_elems = utils::random::generate_vector(pisg, MAX_ELEMENTS);
auto si_elems = utils::random::generate_vector(psig, MAX_ELEMENTS);
auto ss_elems = utils::random::generate_vector(pssg, MAX_ELEMENTS);
/* insertion Tests */
benchmark::RegisterBenchmark("InsertValue[String, Int]", BM_InsertValue,
&si_map, si_elems)
->RangeMultiplier(MULTIPLIER)
->Range(1, MAX_ELEMENTS)
->Complexity(benchmark::oN)
->Threads(THREADS);
benchmark::RegisterBenchmark("InsertValue[String, String]", BM_InsertValue,
&ss_map, ss_elems)
->RangeMultiplier(MULTIPLIER)
->Range(1, MAX_ELEMENTS)
->Complexity(benchmark::oN)
->Threads(THREADS);
// Contains Benchmark Tests
benchmark::RegisterBenchmark("ContainsValue[String, Int]", BM_ContainsValue,
&si_map, si_elems)
->RangeMultiplier(MULTIPLIER)
->Range(1, MAX_ELEMENTS)
->Complexity(benchmark::oN)
->Threads(THREADS);
benchmark::RegisterBenchmark("ContainsValue[String, String]",
BM_ContainsValue, &ss_map, ss_elems)
->RangeMultiplier(MULTIPLIER)
->Range(1, MAX_ELEMENTS)
->Complexity(benchmark::oN)
->Threads(THREADS);
benchmark::Initialize(&argc, argv);
benchmark::RunSpecifiedBenchmarks();
return 0;
}

View File

@ -0,0 +1,45 @@
#define CATCH_CONFIG_MAIN
#include "catch.hpp"
#include "utils/command_line/arguments.hpp"
#include "utils/hashing/fnv64.hpp"
#include "data_structures/bloom/bloom_filter.hpp"
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wwritable-strings"
using StringHashFunction = std::function<uint64_t(const std::string&)>;
TEST_CASE("BloomFilter Test") {
StringHashFunction hash1 = fnv64<std::string>;
StringHashFunction hash2 = fnv1a64<std::string>;
auto c = [](auto x) -> int {
return x % 4;
} ;
std::vector<StringHashFunction> funcs = {
hash1, hash2
};
BloomFilter<std::string, 64> bloom(funcs);
std::string test = "test";
std::string kifla = "kifla";
std::cout << hash1(test) << std::endl;
std::cout << hash2(test) << std::endl;
std::cout << hash1(kifla) << std::endl;
std::cout << hash2(kifla) << std::endl;
std::cout << bloom.contains(test) << std::endl;
bloom.insert(test);
std::cout << bloom.contains(test) << std::endl;
std::cout << bloom.contains(kifla) << std::endl;
bloom.insert(kifla);
std::cout << bloom.contains(kifla) << std::endl;
}
#pragma clang diagnostic pop