Bloom filter code review changes
This commit is contained in:
parent
9b03fd1f04
commit
52c5159bc0
@ -2,8 +2,17 @@
|
|||||||
#include <iostream>
|
#include <iostream>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
|
||||||
|
/*
|
||||||
|
Implementation of a generic Bloom Filter.
|
||||||
|
|
||||||
|
Read more about bloom filters here:
|
||||||
|
http://en.wikipedia.org/wiki/Bloom_filter
|
||||||
|
http://www.jasondavies.com/bloomfilter/
|
||||||
|
*/
|
||||||
|
|
||||||
|
// Type specifies the type of data stored
|
||||||
template <class Type, int BucketSize = 8>
|
template <class Type, int BucketSize = 8>
|
||||||
class BasicBloomFilter {
|
class BloomFilter {
|
||||||
private:
|
private:
|
||||||
using HashFunction = std::function<uint64_t(const Type&)>;
|
using HashFunction = std::function<uint64_t(const Type&)>;
|
||||||
using CompresionFunction = std::function<int(uint64_t)>;
|
using CompresionFunction = std::function<int(uint64_t)>;
|
||||||
@ -28,13 +37,13 @@ class BasicBloomFilter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
public:
|
public:
|
||||||
BasicBloomFilter(std::vector<HashFunction> funcs,
|
BloomFilter(std::vector<HashFunction> funcs,
|
||||||
CompresionFunction compression = {})
|
CompresionFunction compression = {})
|
||||||
: hashes_(funcs) {
|
: hashes_(funcs) {
|
||||||
if (!compression)
|
if (!compression)
|
||||||
compression_ = std::bind(&BasicBloomFilter::default_compression, this,
|
compression_ = std::bind(&BloomFilter::default_compression, this,
|
||||||
std::placeholders::_1);
|
std::placeholders::_1);
|
||||||
else
|
else
|
||||||
compression_ = compression;
|
compression_ = compression;
|
||||||
|
|
||||||
buckets.resize(hashes_.size());
|
buckets.resize(hashes_.size());
|
@ -1,7 +1,7 @@
|
|||||||
#include <random>
|
#include <random>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
#include "data_structures/bloom/basic_bloom_filter.hpp"
|
#include "data_structures/bloom/bloom_filter.hpp"
|
||||||
#include "logging/default.hpp"
|
#include "logging/default.hpp"
|
||||||
#include "logging/streams/stdout.hpp"
|
#include "logging/streams/stdout.hpp"
|
||||||
#include "utils/command_line/arguments.hpp"
|
#include "utils/command_line/arguments.hpp"
|
||||||
@ -14,7 +14,7 @@ using utils::random::StringGenerator;
|
|||||||
using StringHashFunction = std::function<uint64_t(const std::string&)>;
|
using StringHashFunction = std::function<uint64_t(const std::string&)>;
|
||||||
|
|
||||||
template <class Type, int Size>
|
template <class Type, int Size>
|
||||||
static void TestBloom(benchmark::State& state, BasicBloomFilter<Type, Size>*
|
static void TestBloom(benchmark::State& state, BloomFilter<Type, Size>*
|
||||||
bloom, const std::vector<Type>& elements) {
|
bloom, const std::vector<Type>& elements) {
|
||||||
while(state.KeepRunning()) {
|
while(state.KeepRunning()) {
|
||||||
for (int start = 0; start < state.range(0); start++)
|
for (int start = 0; start < state.range(0); start++)
|
||||||
@ -46,7 +46,7 @@ int main(int argc, char** argv) {
|
|||||||
hash1, hash2
|
hash1, hash2
|
||||||
};
|
};
|
||||||
|
|
||||||
BasicBloomFilter<std::string, 128> bloom(funcs);
|
BloomFilter<std::string, 128> bloom(funcs);
|
||||||
|
|
||||||
benchmark::RegisterBenchmark("SimpleBloomFilter Benchmark Test", BM_Bloom,
|
benchmark::RegisterBenchmark("SimpleBloomFilter Benchmark Test", BM_Bloom,
|
||||||
&bloom, elements)
|
&bloom, elements)
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
#include <random>
|
#include <random>
|
||||||
#include <thread>
|
#include <thread>
|
||||||
|
|
||||||
#include "data_structures/bloom/basic_bloom_filter.hpp"
|
#include "data_structures/bloom/bloom_filter.hpp"
|
||||||
#include "data_structures/concurrent/concurrent_bloom_map.hpp"
|
#include "data_structures/concurrent/concurrent_bloom_map.hpp"
|
||||||
#include "logging/default.hpp"
|
#include "logging/default.hpp"
|
||||||
#include "logging/streams/stdout.hpp"
|
#include "logging/streams/stdout.hpp"
|
||||||
@ -50,21 +50,6 @@ static void InsertValue(benchmark::State& state, ConcurrentBloomMap<K, V, F>* ma
|
|||||||
state.SetComplexityN(state.range(0));
|
state.SetComplexityN(state.range(0));
|
||||||
}
|
}
|
||||||
|
|
||||||
/*
|
|
||||||
ConcurrentMap Deletion Benchmark Test
|
|
||||||
template <class K, class V>
|
|
||||||
static void DeleteValue(benchmark::State& state, ConcurrentBloomMap<K, V, F>* map,
|
|
||||||
const std::vector<std::pair<K, V>> elements) {
|
|
||||||
while (state.KeepRunning()) {
|
|
||||||
auto accessor = map->access();
|
|
||||||
for (int start = 0; start < state.range(0); start++) {
|
|
||||||
accessor.remove(elements[start].first);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
state.SetComplexityN(state.range(0));
|
|
||||||
}
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*
|
/*
|
||||||
ConcurrentMap Contains Benchmark Test
|
ConcurrentMap Contains Benchmark Test
|
||||||
*/
|
*/
|
||||||
@ -83,12 +68,6 @@ auto BM_InsertValue = [](benchmark::State& state, auto* map, auto& elements) {
|
|||||||
InsertValue(state, map, elements);
|
InsertValue(state, map, elements);
|
||||||
};
|
};
|
||||||
|
|
||||||
/*
|
|
||||||
auto BM_DeleteValue = [](benchmark::State& state, auto* map, auto elements) {
|
|
||||||
DeleteValue(state, map, elements);
|
|
||||||
};
|
|
||||||
*/
|
|
||||||
|
|
||||||
auto BM_ContainsValue = [](benchmark::State& state, auto* map, auto elements) {
|
auto BM_ContainsValue = [](benchmark::State& state, auto* map, auto elements) {
|
||||||
ContainsValue(state, map, elements);
|
ContainsValue(state, map, elements);
|
||||||
};
|
};
|
||||||
@ -154,12 +133,12 @@ int main(int argc, char** argv) {
|
|||||||
hash1, hash2
|
hash1, hash2
|
||||||
};
|
};
|
||||||
|
|
||||||
BasicBloomFilter<std::string, 128> bloom_filter_(funcs);
|
BloomFilter<std::string, 128> bloom_filter_(funcs);
|
||||||
|
|
||||||
// maps used for testing
|
// maps used for testing
|
||||||
//ConcurrentBloomMap<int, int> ii_map;
|
//ConcurrentBloomMap<int, int> ii_map;
|
||||||
//ConcurrentBloomMap<int, std::string> is_map;
|
//ConcurrentBloomMap<int, std::string> is_map;
|
||||||
using Filter = BasicBloomFilter<std::string, 128>;
|
using Filter = BloomFilter<std::string, 128>;
|
||||||
ConcurrentBloomMap<std::string, int, Filter > si_map(bloom_filter_);
|
ConcurrentBloomMap<std::string, int, Filter > si_map(bloom_filter_);
|
||||||
ConcurrentBloomMap<std::string, std::string, Filter>
|
ConcurrentBloomMap<std::string, std::string, Filter>
|
||||||
ss_map(bloom_filter_);
|
ss_map(bloom_filter_);
|
||||||
@ -171,21 +150,6 @@ ss_map(bloom_filter_);
|
|||||||
auto ss_elems = utils::random::generate_vector(pssg, MAX_ELEMENTS);
|
auto ss_elems = utils::random::generate_vector(pssg, MAX_ELEMENTS);
|
||||||
|
|
||||||
/* insertion Tests */
|
/* insertion Tests */
|
||||||
/*
|
|
||||||
benchmark::RegisterBenchmark("InsertValue[Int, Int]", BM_InsertValue, &ii_map,
|
|
||||||
ii_elems)
|
|
||||||
->RangeMultiplier(MULTIPLIER)
|
|
||||||
->Range(1, MAX_ELEMENTS)
|
|
||||||
->Complexity(benchmark::oN)
|
|
||||||
->Threads(THREADS);
|
|
||||||
|
|
||||||
benchmark::RegisterBenchmark("InsertValue[Int, String]", BM_InsertValue,
|
|
||||||
&is_map, is_elems)
|
|
||||||
->RangeMultiplier(MULTIPLIER)
|
|
||||||
->Range(1, MAX_ELEMENTS)
|
|
||||||
->Complexity(benchmark::oN)
|
|
||||||
->Threads(THREADS);
|
|
||||||
*/
|
|
||||||
benchmark::RegisterBenchmark("InsertValue[String, Int]", BM_InsertValue,
|
benchmark::RegisterBenchmark("InsertValue[String, Int]", BM_InsertValue,
|
||||||
&si_map, si_elems)
|
&si_map, si_elems)
|
||||||
->RangeMultiplier(MULTIPLIER)
|
->RangeMultiplier(MULTIPLIER)
|
||||||
@ -201,22 +165,6 @@ ss_map(bloom_filter_);
|
|||||||
->Threads(THREADS);
|
->Threads(THREADS);
|
||||||
|
|
||||||
// Contains Benchmark Tests
|
// Contains Benchmark Tests
|
||||||
|
|
||||||
/*
|
|
||||||
benchmark::RegisterBenchmark("ContainsValue[Int, Int]", BM_ContainsValue,
|
|
||||||
&ii_map, ii_elems)
|
|
||||||
->RangeMultiplier(MULTIPLIER)
|
|
||||||
->Range(1, MAX_ELEMENTS)
|
|
||||||
->Complexity(benchmark::oN)
|
|
||||||
->Threads(THREADS);
|
|
||||||
|
|
||||||
benchmark::RegisterBenchmark("ContainsValue[Int, String]", BM_ContainsValue,
|
|
||||||
&is_map, is_elems)
|
|
||||||
->RangeMultiplier(MULTIPLIER)
|
|
||||||
->Range(1, MAX_ELEMENTS)
|
|
||||||
->Complexity(benchmark::oN)
|
|
||||||
->Threads(THREADS);
|
|
||||||
*/
|
|
||||||
benchmark::RegisterBenchmark("ContainsValue[String, Int]", BM_ContainsValue,
|
benchmark::RegisterBenchmark("ContainsValue[String, Int]", BM_ContainsValue,
|
||||||
&si_map, si_elems)
|
&si_map, si_elems)
|
||||||
->RangeMultiplier(MULTIPLIER)
|
->RangeMultiplier(MULTIPLIER)
|
||||||
@ -231,38 +179,6 @@ ss_map(bloom_filter_);
|
|||||||
->Complexity(benchmark::oN)
|
->Complexity(benchmark::oN)
|
||||||
->Threads(THREADS);
|
->Threads(THREADS);
|
||||||
|
|
||||||
// Deletion Banchamark Tests
|
|
||||||
/*
|
|
||||||
|
|
||||||
benchmark::RegisterBenchmark("DeleteValue[Int, Int]", BM_DeleteValue, &ii_map,
|
|
||||||
ii_elems)
|
|
||||||
->RangeMultiplier(MULTIPLIER)
|
|
||||||
->Range(1, MAX_ELEMENTS)
|
|
||||||
->Complexity(benchmark::oN)
|
|
||||||
->Threads(THREADS);
|
|
||||||
|
|
||||||
benchmark::RegisterBenchmark("DeleteValue[Int, String]", BM_DeleteValue,
|
|
||||||
&is_map, is_elems)
|
|
||||||
->RangeMultiplier(MULTIPLIER)
|
|
||||||
->Range(1, MAX_ELEMENTS)
|
|
||||||
->Complexity(benchmark::oN)
|
|
||||||
->Threads(THREADS);
|
|
||||||
|
|
||||||
benchmark::RegisterBenchmark("DeleteValue[String, Int]", BM_DeleteValue,
|
|
||||||
&si_map, si_elems)
|
|
||||||
->RangeMultiplier(MULTIPLIER)
|
|
||||||
->Range(1, MAX_ELEMENTS)
|
|
||||||
->Complexity(benchmark::oN)
|
|
||||||
->Threads(THREADS);
|
|
||||||
|
|
||||||
benchmark::RegisterBenchmark("DeleteValue[String, String]", BM_DeleteValue,
|
|
||||||
&ss_map, ss_elems)
|
|
||||||
->RangeMultiplier(MULTIPLIER)
|
|
||||||
->Range(1, MAX_ELEMENTS)
|
|
||||||
->Complexity(benchmark::oN)
|
|
||||||
->Threads(THREADS);
|
|
||||||
*/
|
|
||||||
|
|
||||||
benchmark::Initialize(&argc, argv);
|
benchmark::Initialize(&argc, argv);
|
||||||
benchmark::RunSpecifiedBenchmarks();
|
benchmark::RunSpecifiedBenchmarks();
|
||||||
|
|
||||||
|
@ -4,14 +4,14 @@
|
|||||||
#include "utils/command_line/arguments.hpp"
|
#include "utils/command_line/arguments.hpp"
|
||||||
#include "utils/hashing/fnv64.hpp"
|
#include "utils/hashing/fnv64.hpp"
|
||||||
|
|
||||||
#include "data_structures/bloom/basic_bloom_filter.hpp"
|
#include "data_structures/bloom/bloom_filter.hpp"
|
||||||
|
|
||||||
#pragma clang diagnostic push
|
#pragma clang diagnostic push
|
||||||
#pragma clang diagnostic ignored "-Wwritable-strings"
|
#pragma clang diagnostic ignored "-Wwritable-strings"
|
||||||
|
|
||||||
using StringHashFunction = std::function<uint64_t(const std::string&)>;
|
using StringHashFunction = std::function<uint64_t(const std::string&)>;
|
||||||
|
|
||||||
TEST_CASE("BasicBloomFilter Test") {
|
TEST_CASE("BloomFilter Test") {
|
||||||
StringHashFunction hash1 = fnv64<std::string>;
|
StringHashFunction hash1 = fnv64<std::string>;
|
||||||
StringHashFunction hash2 = fnv1a64<std::string>;
|
StringHashFunction hash2 = fnv1a64<std::string>;
|
||||||
|
|
||||||
@ -22,10 +22,10 @@ TEST_CASE("BasicBloomFilter Test") {
|
|||||||
hash1, hash2
|
hash1, hash2
|
||||||
};
|
};
|
||||||
|
|
||||||
BasicBloomFilter<std::string, 64> bloom(funcs);
|
BloomFilter<std::string, 64> bloom(funcs);
|
||||||
|
|
||||||
std::string test = "test";
|
std::string test = "test";
|
||||||
std::string kifla = "pizda";
|
std::string kifla = "kifla";
|
||||||
|
|
||||||
std::cout << hash1(test) << std::endl;
|
std::cout << hash1(test) << std::endl;
|
||||||
std::cout << hash2(test) << std::endl;
|
std::cout << hash2(test) << std::endl;
|
||||||
|
Loading…
Reference in New Issue
Block a user