GoCollections/util/bloom/Bloom.go

88 lines
1.4 KiB
Go
Raw Normal View History

2023-04-16 22:37:25 +08:00
package bloom
import (
"math"
"github.com/spaolacci/murmur3"
"github.com/tursom/GoCollections/lang"
)
var (
HashFunc = murmur3.Sum32WithSeed
)
type (
Bloom struct {
m lang.UInt8Array
k uint
c uint
}
)
func max(i1, i2 uint) uint {
if i1 < i2 {
return i2
} else {
return i1
}
}
func numHashFunctions(n, m float64) uint {
return max(1, uint(math.Floor(0.5+m/n*math.Ln2)))
}
func NumHashFunctions(n, m uint) uint {
return numHashFunctions(float64(n), float64(m))
}
func calcBitLength(n float64, p float64) uint {
if p == 0 {
p = math.SmallestNonzeroFloat64
}
return uint(-n * math.Log(p) / (math.Ln2 * math.Ln2))
}
func CalcBitLength(n uint, p float64) uint {
return calcBitLength(float64(n), p)
}
func NewBloom(n uint, p float64) *Bloom {
m := CalcBitLength(n, p) - 1
return &Bloom{
m: make(lang.UInt8Array, m/8+1),
k: NumHashFunctions(n, m),
}
}
func (b *Bloom) C() uint {
return b.c
}
func (b *Bloom) K() uint {
return b.k
}
func (b *Bloom) M() uint {
return uint(len(b.m)) * 8
}
func (b *Bloom) Contains(data []byte) bool {
for i := 0; i < int(b.k); i++ {
hashCode := uint(HashFunc(data, uint32(i)))
2023-04-18 18:15:04 +08:00
if !b.m.GetBit(hashCode % b.m.BitLength()) {
2023-04-16 22:37:25 +08:00
return false
}
}
return true
}
func (b *Bloom) Add(data []byte) {
b.c++
for i := 0; i < int(b.k); i++ {
hashCode := uint(HashFunc(data, uint32(i)))
b.m.SetBit(hashCode%b.m.BitLength(), true)
}
}