In [1]:
import collections
import subprocess
import itertools
import os
import time
import madoka
import numpy as np
import redis
ALPHANUM = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
NUM_ALPHANUM_COMBINATION = 238328
zipf_array = np.random.zipf(1.5, NUM_ALPHANUM_COMBINATION)
In [2]:
def python_memory_usage():
return int(subprocess.getoutput('ps up %s' % os.getpid()).split()[15])
def redis_memory_usage():
lines = subprocess.getoutput('ps').splitlines()
for line in lines:
if 'redis-server' in line:
pid = line.split()[0]
break
return int(subprocess.getoutput('ps up %s' % pid).split()[15])
def count(counter):
for (i, chars) in enumerate(itertools.product(ALPHANUM, repeat=3)):
chars = ''.join(chars)
counter[chars] = int(zipf_array[i])
return counter
def benchmark(counter, start_mem_usage):
counter = count(counter)
end_mem_usage = python_memory_usage()
diff = end_mem_usage - start_mem_usage
print('memory consumption is {:,d} KB'.format(diff))
return counter
def redis_benchmark():
db = redis.Redis()
db.flushall()
start_mem_usage = redis_memory_usage()
with db.pipeline() as pipe:
for (i, chars) in enumerate(itertools.product(ALPHANUM, repeat=3)):
chars = ''.join(chars)
pipe.set(chars, int(zipf_array[i]))
pipe.execute()
end_mem_usage = redis_memory_usage()
diff = end_mem_usage - start_mem_usage
print('memory consumption is {:,d} KB'.format(diff))
In [3]:
print('collections.Counter')
start_mem_usage = python_memory_usage()
start_time = time.process_time()
counter = collections.Counter()
benchmark(counter, start_mem_usage)
end_time = time.process_time()
print('Processsing Time is %5f sec.' % (end_time - start_time))
del counter
print('*' * 30)
print('madoka.Sketch')
start_mem_usage = python_memory_usage()
start_time = time.process_time()
sketch = madoka.Sketch()
benchmark(sketch, start_mem_usage)
end_time = time.process_time()
print('Processsing Time is %5f sec.' % (end_time - start_time))
del sketch
print('*' * 30)
print('Redis')
start_time = time.process_time()
redis_benchmark()
end_time = time.process_time()
print('Processsing Time is %5f sec.' % (end_time - start_time))
In [4]:
sketch = madoka.Sketch()
diffs = []
for (i, chars) in enumerate(itertools.product(ALPHANUM, repeat=3)):
chars = ''.join(chars)
sketch[chars] = int(zipf_array[i])
diff = abs(sketch[chars] - int(zipf_array[i]))
if diff > 0:
diffs.append(diff / int(zipf_array[i]) * 100)
else:
diffs.append(0)
In [5]:
print(np.average(diffs))
[Low] madoka.Sketch < Redis < collections.Counter [High]
About 0.0911 %
In [ ]: