mirror of
https://github.com/tursom/GoCollections.git
synced 2025-03-13 17:00:18 +08:00
118 lines
2.0 KiB
Go
118 lines
2.0 KiB
Go
package bloom
|
|
|
|
import (
|
|
"encoding/binary"
|
|
"io"
|
|
"math"
|
|
"unsafe"
|
|
|
|
"github.com/spaolacci/murmur3"
|
|
|
|
"github.com/tursom/GoCollections/exceptions"
|
|
"github.com/tursom/GoCollections/lang"
|
|
)
|
|
|
|
var (
|
|
HashFunc = murmur3.Sum32WithSeed
|
|
)
|
|
|
|
type (
|
|
Bloom struct {
|
|
m lang.UInt8Array
|
|
k uint
|
|
c uint
|
|
}
|
|
)
|
|
|
|
func max(i1, i2 uint) uint {
|
|
if i1 < i2 {
|
|
return i2
|
|
} else {
|
|
return i1
|
|
}
|
|
}
|
|
|
|
func numHashFunctions(n, m float64) uint {
|
|
return max(1, uint(math.Floor(0.5+m/n*math.Ln2)))
|
|
}
|
|
|
|
func NumHashFunctions(n, m uint) uint {
|
|
return numHashFunctions(float64(n), float64(m))
|
|
}
|
|
|
|
func calcBitLength(n float64, p float64) uint {
|
|
if p == 0 {
|
|
p = math.SmallestNonzeroFloat64
|
|
}
|
|
return uint(-n * math.Log(p) / (math.Ln2 * math.Ln2))
|
|
}
|
|
|
|
func CalcBitLength(n uint, p float64) uint {
|
|
return calcBitLength(float64(n), p)
|
|
}
|
|
|
|
func NewBloom(n uint, p float64) *Bloom {
|
|
m := CalcBitLength(n, p) - 1
|
|
return &Bloom{
|
|
m: make(lang.UInt8Array, m/8+1),
|
|
k: NumHashFunctions(n, m),
|
|
}
|
|
}
|
|
|
|
func (b *Bloom) C() uint {
|
|
return b.c
|
|
}
|
|
|
|
func (b *Bloom) K() uint {
|
|
return b.k
|
|
}
|
|
|
|
func (b *Bloom) M() uint {
|
|
return uint(len(b.m)) * 8
|
|
}
|
|
|
|
func (b *Bloom) Contains(data []byte) bool {
|
|
for i := 0; i < int(b.k); i++ {
|
|
hashCode := uint(HashFunc(data, uint32(i)))
|
|
if !b.m.GetBit(hashCode % b.m.BitLength()) {
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
func (b *Bloom) Add(data []byte) {
|
|
b.c++
|
|
for i := 0; i < int(b.k); i++ {
|
|
hashCode := uint(HashFunc(data, uint32(i)))
|
|
b.m.SetBit(hashCode%b.m.BitLength(), true)
|
|
}
|
|
}
|
|
|
|
func (b *Bloom) Marshal(writer io.Writer) {
|
|
if err := binary.Write(writer, binary.BigEndian, uint32(b.k)); err != nil {
|
|
panic(exceptions.Package(err))
|
|
}
|
|
if err := binary.Write(writer, binary.BigEndian, uint32(b.c)); err != nil {
|
|
panic(exceptions.Package(err))
|
|
}
|
|
|
|
if _, err := writer.Write(b.m.Bytes()); err != nil {
|
|
panic(exceptions.Package(err))
|
|
}
|
|
}
|
|
|
|
func Unmarshal(data []byte) *Bloom {
|
|
k := binary.BigEndian.Uint32(data)
|
|
c := binary.BigEndian.Uint32(data[4:])
|
|
|
|
m := data[8:]
|
|
|
|
return &Bloom{
|
|
m: *(*lang.UInt8Array)(unsafe.Pointer(&m)),
|
|
k: uint(k),
|
|
c: uint(c),
|
|
}
|
|
}
|