In [1]:
    
import collections
import subprocess
import itertools
import os
import time
import madoka
import numpy as np
import redis
ALPHANUM = 'abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789'
NUM_ALPHANUM_COMBINATION = 238328
zipf_array = np.random.zipf(1.5, NUM_ALPHANUM_COMBINATION)
    
In [2]:
    
def python_memory_usage():
    return int(subprocess.getoutput('ps up %s' % os.getpid()).split()[15])
def redis_memory_usage():
    lines = subprocess.getoutput('ps').splitlines()
    for line in lines:
        if 'redis-server' in line:
            pid = line.split()[0]
            break
    return int(subprocess.getoutput('ps up %s' % pid).split()[15])
def count(counter):
    for (i, chars) in enumerate(itertools.product(ALPHANUM, repeat=3)):
        chars = ''.join(chars)
        counter[chars] = int(zipf_array[i])
    return counter
def benchmark(counter, start_mem_usage):
    counter = count(counter)
    end_mem_usage = python_memory_usage()
    diff = end_mem_usage - start_mem_usage
    print('memory consumption is {:,d} KB'.format(diff))
    return counter
def redis_benchmark():
    db = redis.Redis()
    db.flushall()
    start_mem_usage = redis_memory_usage()
    with db.pipeline() as pipe:
        for (i, chars) in enumerate(itertools.product(ALPHANUM, repeat=3)):
            chars = ''.join(chars)
            pipe.set(chars, int(zipf_array[i]))
        pipe.execute()
    end_mem_usage = redis_memory_usage()
    diff = end_mem_usage - start_mem_usage
    print('memory consumption is {:,d} KB'.format(diff))
    
In [3]:
    
print('collections.Counter')
start_mem_usage = python_memory_usage()
start_time = time.process_time()
counter = collections.Counter()
benchmark(counter, start_mem_usage)
end_time = time.process_time()
print('Processsing Time is %5f sec.' % (end_time - start_time))
del counter
print('*' * 30)
print('madoka.Sketch')
start_mem_usage = python_memory_usage()
start_time = time.process_time()
sketch = madoka.Sketch()
benchmark(sketch, start_mem_usage)
end_time = time.process_time()
print('Processsing Time is %5f sec.' % (end_time - start_time))
del sketch
print('*' * 30)
print('Redis')
start_time = time.process_time()
redis_benchmark()
end_time = time.process_time()
print('Processsing Time is %5f sec.' % (end_time - start_time))
    
    
In [4]:
    
sketch = madoka.Sketch()
diffs = []
for (i, chars) in enumerate(itertools.product(ALPHANUM, repeat=3)):
    chars = ''.join(chars)
    sketch[chars] = int(zipf_array[i])
    diff = abs(sketch[chars] - int(zipf_array[i]))
    if diff > 0:
        diffs.append(diff / int(zipf_array[i]) * 100)
    else:
        diffs.append(0)
    
In [5]:
    
print(np.average(diffs))
    
    
[Low] madoka.Sketch < Redis < collections.Counter [High]
About 0.0911 %
In [ ]: