commit eda793fad7cdcef08e761402bf9121b74d0bbf96
Author: Chris Johns <chris@ter0.net>
Date: Fri, 25 Jun 2021 23:48:55 +0100
initial commit
Diffstat:
A | go.mod | | | 3 | +++ |
A | heck.go | | | 69 | +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ |
A | heck_test.go | | | 17 | +++++++++++++++++ |
3 files changed, 89 insertions(+), 0 deletions(-)
diff --git a/go.mod b/go.mod
@@ -0,0 +1,3 @@
+module git.ter0.net/heck
+
+go 1.16
diff --git a/heck.go b/heck.go
@@ -0,0 +1,69 @@
+package heck
+
+import (
+ "hash/maphash"
+ "math"
+ "math/big"
+)
+
+type BloomFilter struct {
+ b *big.Int
+ seeds []maphash.Seed
+ hash *maphash.Hash
+ m uint64
+ k int
+ ε float64
+}
+
+func k(m uint64, n uint64) int {
+ return int(math.Ceil(float64(m/n) * math.Log(2)))
+}
+
+func m(n uint64, ε float64) uint64 {
+ return uint64(math.Ceil(math.Abs(((float64(n) * math.Log(ε)) / math.Pow(math.Log(2), 2)))))
+}
+
+func NewBloomFilter(n uint64, ε float64) BloomFilter {
+ m := m(n, ε)
+ k := k(m, n)
+ f := BloomFilter{
+ b: new(big.Int),
+ m: m,
+ k: k,
+ ε: ε,
+ seeds: make([]maphash.Seed, k, k),
+ hash: new(maphash.Hash),
+ }
+
+ // it would be nicer to just have k initialised hashes we could reuse,
+ // but reusing them seems harder than the docs imply
+ for i := range f.seeds {
+ f.seeds[i] = maphash.MakeSeed()
+ }
+
+ return f
+}
+
+func (f *BloomFilter) index(s maphash.Seed, b []byte) int {
+ f.hash.SetSeed(s)
+ f.hash.Write(b)
+ defer f.hash.Reset()
+ return int(f.hash.Sum64() % f.m)
+}
+
+func (f *BloomFilter) Add(b []byte) {
+ for _, seed := range f.seeds {
+ index := f.index(seed, b)
+ f.b.SetBit(f.b, index, 1)
+ }
+}
+
+func (f *BloomFilter) IsMember(b []byte) bool {
+ for _, seed := range f.seeds {
+ index := f.index(seed, b)
+ if f.b.Bit(index) == 0 {
+ return false
+ }
+ }
+ return true
+}
diff --git a/heck_test.go b/heck_test.go
@@ -0,0 +1,17 @@
+package heck
+
+import (
+ "testing"
+)
+
+func TestMembership(t *testing.T) {
+ f := NewBloomFilter(1000, 0.01)
+ bytes := []byte{'f', 'o', 'o'}
+ if f.IsMember(bytes) {
+ t.Errorf("element unexpectedly in bloom filter")
+ }
+ f.Add(bytes)
+ if !f.IsMember(bytes) {
+ t.Errorf("element unexpectedly not in bloom filter")
+ }
+}