diff --git a/include/data_structures/bloom/basic_bloom_filter.hpp b/include/data_structures/bloom/bloom_filter.hpp similarity index 75% rename from include/data_structures/bloom/basic_bloom_filter.hpp rename to include/data_structures/bloom/bloom_filter.hpp index 99e26f0aa..33da0df80 100644 --- a/include/data_structures/bloom/basic_bloom_filter.hpp +++ b/include/data_structures/bloom/bloom_filter.hpp @@ -2,8 +2,17 @@ #include <iostream> #include <vector> +/* + Implementation of a generic Bloom Filter. + + Read more about bloom filters here: + http://en.wikipedia.org/wiki/Bloom_filter + http://www.jasondavies.com/bloomfilter/ +*/ + +// Type specifies the type of data stored template <class Type, int BucketSize = 8> -class BasicBloomFilter { +class BloomFilter { private: using HashFunction = std::function<uint64_t(const Type&)>; using CompresionFunction = std::function<int(uint64_t)>; @@ -28,13 +37,13 @@ class BasicBloomFilter { } public: - BasicBloomFilter(std::vector<HashFunction> funcs, - CompresionFunction compression = {}) + BloomFilter(std::vector<HashFunction> funcs, + CompresionFunction compression = {}) : hashes_(funcs) { if (!compression) - compression_ = std::bind(&BasicBloomFilter::default_compression, this, + compression_ = std::bind(&BloomFilter::default_compression, this, std::placeholders::_1); - else + else compression_ = compression; buckets.resize(hashes_.size()); diff --git a/tests/benchmark/data_structures/bloom/basic_bloom_filter.cpp b/tests/benchmark/data_structures/bloom/basic_bloom_filter.cpp index 231993fcb..36a74506d 100644 --- a/tests/benchmark/data_structures/bloom/basic_bloom_filter.cpp +++ b/tests/benchmark/data_structures/bloom/basic_bloom_filter.cpp @@ -1,7 +1,7 @@ #include <random> #include <thread> -#include "data_structures/bloom/basic_bloom_filter.hpp" +#include "data_structures/bloom/bloom_filter.hpp" #include "logging/default.hpp" #include "logging/streams/stdout.hpp" #include "utils/command_line/arguments.hpp" @@ -14,7 +14,7 @@ using utils::random::StringGenerator; using StringHashFunction = std::function<uint64_t(const std::string&)>; template <class Type, int Size> -static void TestBloom(benchmark::State& state, BasicBloomFilter<Type, Size>* +static void TestBloom(benchmark::State& state, BloomFilter<Type, Size>* bloom, const std::vector<Type>& elements) { while(state.KeepRunning()) { for (int start = 0; start < state.range(0); start++) @@ -46,7 +46,7 @@ int main(int argc, char** argv) { hash1, hash2 }; - BasicBloomFilter<std::string, 128> bloom(funcs); + BloomFilter<std::string, 128> bloom(funcs); benchmark::RegisterBenchmark("SimpleBloomFilter Benchmark Test", BM_Bloom, &bloom, elements) diff --git a/tests/benchmark/data_structures/concurrent/concurrent_bloom_map.cpp b/tests/benchmark/data_structures/concurrent/concurrent_bloom_map.cpp index 17df96bb5..f305d8b20 100644 --- a/tests/benchmark/data_structures/concurrent/concurrent_bloom_map.cpp +++ b/tests/benchmark/data_structures/concurrent/concurrent_bloom_map.cpp @@ -1,7 +1,7 @@ #include <random> #include <thread> -#include "data_structures/bloom/basic_bloom_filter.hpp" +#include "data_structures/bloom/bloom_filter.hpp" #include "data_structures/concurrent/concurrent_bloom_map.hpp" #include "logging/default.hpp" #include "logging/streams/stdout.hpp" @@ -50,21 +50,6 @@ static void InsertValue(benchmark::State& state, ConcurrentBloomMap<K, V, F>* ma state.SetComplexityN(state.range(0)); } -/* - ConcurrentMap Deletion Benchmark Test -template <class K, class V> -static void DeleteValue(benchmark::State& state, ConcurrentBloomMap<K, V, F>* map, - const std::vector<std::pair<K, V>> elements) { - while (state.KeepRunning()) { - auto accessor = map->access(); - for (int start = 0; start < state.range(0); start++) { - accessor.remove(elements[start].first); - } - } - state.SetComplexityN(state.range(0)); -} -*/ - /* ConcurrentMap Contains Benchmark Test */ @@ -83,12 +68,6 @@ auto BM_InsertValue = [](benchmark::State& state, auto* map, auto& elements) { InsertValue(state, map, elements); }; -/* -auto BM_DeleteValue = [](benchmark::State& state, auto* map, auto elements) { - DeleteValue(state, map, elements); -}; -*/ - auto BM_ContainsValue = [](benchmark::State& state, auto* map, auto elements) { ContainsValue(state, map, elements); }; @@ -154,12 +133,12 @@ int main(int argc, char** argv) { hash1, hash2 }; - BasicBloomFilter<std::string, 128> bloom_filter_(funcs); + BloomFilter<std::string, 128> bloom_filter_(funcs); // maps used for testing //ConcurrentBloomMap<int, int> ii_map; //ConcurrentBloomMap<int, std::string> is_map; - using Filter = BasicBloomFilter<std::string, 128>; + using Filter = BloomFilter<std::string, 128>; ConcurrentBloomMap<std::string, int, Filter > si_map(bloom_filter_); ConcurrentBloomMap<std::string, std::string, Filter> ss_map(bloom_filter_); @@ -171,21 +150,6 @@ ss_map(bloom_filter_); auto ss_elems = utils::random::generate_vector(pssg, MAX_ELEMENTS); /* insertion Tests */ - /* - benchmark::RegisterBenchmark("InsertValue[Int, Int]", BM_InsertValue, &ii_map, - ii_elems) - ->RangeMultiplier(MULTIPLIER) - ->Range(1, MAX_ELEMENTS) - ->Complexity(benchmark::oN) - ->Threads(THREADS); - - benchmark::RegisterBenchmark("InsertValue[Int, String]", BM_InsertValue, - &is_map, is_elems) - ->RangeMultiplier(MULTIPLIER) - ->Range(1, MAX_ELEMENTS) - ->Complexity(benchmark::oN) - ->Threads(THREADS); - */ benchmark::RegisterBenchmark("InsertValue[String, Int]", BM_InsertValue, &si_map, si_elems) ->RangeMultiplier(MULTIPLIER) @@ -201,22 +165,6 @@ ss_map(bloom_filter_); ->Threads(THREADS); // Contains Benchmark Tests - - /* - benchmark::RegisterBenchmark("ContainsValue[Int, Int]", BM_ContainsValue, - &ii_map, ii_elems) - ->RangeMultiplier(MULTIPLIER) - ->Range(1, MAX_ELEMENTS) - ->Complexity(benchmark::oN) - ->Threads(THREADS); - - benchmark::RegisterBenchmark("ContainsValue[Int, String]", BM_ContainsValue, - &is_map, is_elems) - ->RangeMultiplier(MULTIPLIER) - ->Range(1, MAX_ELEMENTS) - ->Complexity(benchmark::oN) - ->Threads(THREADS); - */ benchmark::RegisterBenchmark("ContainsValue[String, Int]", BM_ContainsValue, &si_map, si_elems) ->RangeMultiplier(MULTIPLIER) @@ -231,38 +179,6 @@ ss_map(bloom_filter_); ->Complexity(benchmark::oN) ->Threads(THREADS); - // Deletion Banchamark Tests - /* - - benchmark::RegisterBenchmark("DeleteValue[Int, Int]", BM_DeleteValue, &ii_map, - ii_elems) - ->RangeMultiplier(MULTIPLIER) - ->Range(1, MAX_ELEMENTS) - ->Complexity(benchmark::oN) - ->Threads(THREADS); - - benchmark::RegisterBenchmark("DeleteValue[Int, String]", BM_DeleteValue, - &is_map, is_elems) - ->RangeMultiplier(MULTIPLIER) - ->Range(1, MAX_ELEMENTS) - ->Complexity(benchmark::oN) - ->Threads(THREADS); - - benchmark::RegisterBenchmark("DeleteValue[String, Int]", BM_DeleteValue, - &si_map, si_elems) - ->RangeMultiplier(MULTIPLIER) - ->Range(1, MAX_ELEMENTS) - ->Complexity(benchmark::oN) - ->Threads(THREADS); - - benchmark::RegisterBenchmark("DeleteValue[String, String]", BM_DeleteValue, - &ss_map, ss_elems) - ->RangeMultiplier(MULTIPLIER) - ->Range(1, MAX_ELEMENTS) - ->Complexity(benchmark::oN) - ->Threads(THREADS); - */ - benchmark::Initialize(&argc, argv); benchmark::RunSpecifiedBenchmarks(); diff --git a/tests/unit/basic_bloom_filter.cpp b/tests/unit/basic_bloom_filter.cpp index 77484ca08..ac4df7fc2 100644 --- a/tests/unit/basic_bloom_filter.cpp +++ b/tests/unit/basic_bloom_filter.cpp @@ -4,14 +4,14 @@ #include "utils/command_line/arguments.hpp" #include "utils/hashing/fnv64.hpp" -#include "data_structures/bloom/basic_bloom_filter.hpp" +#include "data_structures/bloom/bloom_filter.hpp" #pragma clang diagnostic push #pragma clang diagnostic ignored "-Wwritable-strings" using StringHashFunction = std::function<uint64_t(const std::string&)>; -TEST_CASE("BasicBloomFilter Test") { +TEST_CASE("BloomFilter Test") { StringHashFunction hash1 = fnv64<std::string>; StringHashFunction hash2 = fnv1a64<std::string>; @@ -22,10 +22,10 @@ TEST_CASE("BasicBloomFilter Test") { hash1, hash2 }; - BasicBloomFilter<std::string, 64> bloom(funcs); + BloomFilter<std::string, 64> bloom(funcs); std::string test = "test"; - std::string kifla = "pizda"; + std::string kifla = "kifla"; std::cout << hash1(test) << std::endl; std::cout << hash2(test) << std::endl;