diff --git a/utils/random/fast_binomial.hpp b/utils/random/fast_binomial.hpp new file mode 100644 index 000000000..782f00c2a --- /dev/null +++ b/utils/random/fast_binomial.hpp @@ -0,0 +1,59 @@ +#pragma once + +#include "xorshift128plus.hpp" +#include "utils/likely.hpp" + +template +class FastBinomial +{ + // fast binomial draws coin tosses from a single generated random number + // let's draw a random 4 bit number and count trailing ones + // + // 1 0000 -> 1 = + // 2 0001 -> 2 == 8 x = p = 8/16 = 1/2 + // 3 0010 -> 1 = 4 x == p = 4/16 = 1/4 p_total = 15/16 + // 4 0011 -> 3 === 2 x === p = 2/16 = 1/8 + // 5 0100 -> 1 = 1 x ==== p = 1/16 = 1/16 + // 6 0101 -> 2 == -------------------------- + // 7 0110 -> 1 = 1 x ===== p = 1/16 invalid value, retry! + // 8 0111 -> 4 ==== + // 9 1000 -> 1 = + // 10 1001 -> 2 == + // 11 1010 -> 1 = + // 12 1011 -> 3 === + // 13 1100 -> 1 = + // 14 1101 -> 2 == + // 15 1110 -> 1 = + // ------------------ + // 16 1111 -> 5 ===== + + static constexpr uint64_t mask = (1 << N) - 1; + +public: + FastBinomial() = default; + + unsigned operator()() + { + while(true) + { + // couting trailing ones is equal to counting trailing zeros + // since the probability for both is 1/2 and we're going to + // count zeros because they are easier to work with + + // generate a random number + auto x = random() & mask; + + // if we have all zeros, then we have an invalid case and we + // need to generate again + if(UNLIKELY(!x)) + continue; + + // ctzl = count trailing zeros from long + // ^ ^ ^ ^ + return __builtin_ctzl(x) + 1; + } + } + +private: + R random; +}; diff --git a/utils/random/xorshift.hpp b/utils/random/xorshift128plus.hpp similarity index 71% rename from utils/random/xorshift.hpp rename to utils/random/xorshift128plus.hpp index 82b6b8369..1602ffda4 100644 --- a/utils/random/xorshift.hpp +++ b/utils/random/xorshift128plus.hpp @@ -4,28 +4,18 @@ #include #include -namespace xorshift +/* Xorshift algorithm (plus variant) + * + * This is the fastest generator passing BigCrush without systematic failures, + * but due to the relatively short period it is acceptable only for + * applications with a mild amount of parallelism, otherwise, use a + * xorshift1024* generator. + */ +struct Xorshift128plus { - static uint64_t s[2]; - - uint64_t avalance(uint64_t s) +public: + Xorshift128plus() { - // MurmurHash3 finalizer - s ^= s >> 33; - s *= 0xff51afd7ed558ccd; - s ^= s >> 33; - s *= 0xc4ceb9fe1a85ec53; - s ^= s >> 33; - - return s; - } - - void init() - { - // TODO - // not sure if this thread local means anything for other threads - // fix this!!!! - // use a slow, more complex rnd generator to initialize a fast one // make sure to call this before requesting any random numbers! std::random_device rd; @@ -40,16 +30,31 @@ namespace xorshift s[1] = avalance(dist(gen)); } - uint64_t next() + uint64_t operator()() { uint64_t s1 = s[0]; const uint64_t s0 = s[1]; s[0] = s0; - s1 ^= s1 << 23; + s1 ^= s1 << 23; - return (s[1] = (s1 ^ s0 ^ (s1 >> 17) ^ (s0 >> 26))) + s0; + return (s[1] = (s1 ^ s0 ^ (s1 >> 17) ^ (s0 >> 26))) + s0; } -} + +private: + uint64_t s[2]; + + uint64_t avalance(uint64_t s) + { + // MurmurHash3 finalizer + s ^= s >> 33; + s *= 0xff51afd7ed558ccd; + s ^= s >> 33; + s *= 0xc4ceb9fe1a85ec53; + s ^= s >> 33; + + return s; + } +}; #endif