In [13]:
from random import *
N = 16
T = 100*1000
PRIMES = [121021, 121151, 150151, 151051, 151121, 180181, 180811, 181081, 2976221,
3021377, 6972593, 13466917, 20996011, 24036583, 25964951, 30402457]
def generate_hash(max_n, prime):
p = prime
n = max_n
def hash_fn(val):
return (val*p) % n
return hash_fn
class CountMinSketch:
def __init__(self):
self.tables = [[0]*T for _ in range(0, N)]
self.hashes = [generate_hash(T, PRIMES[i]) for i in range(0, N)]
def add(self, val, count):
hashes = self.hashes
tables = self.tables
for ix in range(0, N):
h = hashes[ix](val)
tables[ix][h] += count
def estimate(self, value):
results = []
tables = self.tables
hashes = self.hashes
for ix in range(0, N):
h = hashes[ix](value)
c = tables[ix][h]
results.append(c)
return min(results)
In [10]:
seq1 = range(10000, 100000)
sketch1 = CountMinSketch()
for ix, x in enumerate(seq1):
sketch1.add(x, ix + 1)
In [11]:
errors = []
for ix, x in enumerate(seq1):
est = sketch1.estimate(x)
err = float(est - ix - 1)/(ix+1)
errors.append(err)
In [12]:
sum(errors)
Out[12]:
In [ ]: