• Trying to increase the speed and efficiency of SIPSim

In [15]:
import os,sys
import numpy as np
import pandas as pd
import scipy.stats as stats
from collections import defaultdict


/opt/anaconda/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module argparse was already imported from /opt/anaconda/lib/python2.7/argparse.pyc, but /opt/anaconda/lib/python2.7/site-packages is being added to sys.path
  from pkg_resources import resource_stream

Distributions


In [8]:
%%timeit

np.random.normal(size=1)


1000000 loops, best of 3: 1.62 µs per loop

In [9]:
%%timeit

stats.norm().rvs()


1000 loops, best of 3: 680 µs per loop

dict of pd.DataFrames versus 3d numpy array


In [26]:
ddf = {'a': pd.DataFrame({'a':range(10), 'b':range(10)}),
       'b': pd.DataFrame({'a':range(10), 'b':range(10)})
       }

narr = np.array([np.array(range(10) + range(10)).reshape(2,10).transpose(),
                np.array(range(10) + range(10)).reshape(2,10).transpose()
                ])

In [27]:
ddf


Out[27]:
{'a':    a  b
 0  0  0
 1  1  1
 2  2  2
 3  3  3
 4  4  4
 5  5  5
 6  6  6
 7  7  7
 8  8  8
 9  9  9, 'b':    a  b
 0  0  0
 1  1  1
 2  2  2
 3  3  3
 4  4  4
 5  5  5
 6  6  6
 7  7  7
 8  8  8
 9  9  9}

In [28]:
narr


Out[28]:
array([[[0, 0],
        [1, 1],
        [2, 2],
        [3, 3],
        [4, 4],
        [5, 5],
        [6, 6],
        [7, 7],
        [8, 8],
        [9, 9]],

       [[0, 0],
        [1, 1],
        [2, 2],
        [3, 3],
        [4, 4],
        [5, 5],
        [6, 6],
        [7, 7],
        [8, 8],
        [9, 9]]])

In [29]:
x = np.array([(1.0, 2), (3.0, 4)], dtype=[('x', float), ('y', int)])

In [42]:
y = pd.DataFrame({'x' : (1.0, 2), 'y' : (3.0, 4) })

In [45]:
%%timeit

x['x'][0] += 1


1000000 loops, best of 3: 519 ns per loop

In [50]:
%%timeit

y['x'][0] += 1


10000 loops, best of 3: 74.9 µs per loop

In [53]:
a = np.zeros((10,10,10))
b = np.zeros((10*10*10,))

In [60]:
%%timeit 

a[0,0,0] += 1


1000000 loops, best of 3: 328 ns per loop

In [62]:
%%timeit

b[100] += 1


1000000 loops, best of 3: 291 ns per loop

In [86]:
a = np.array([np.zeros((10,10)),np.zeros((10,10))])
b = {'a':np.zeros((10,10)), 'b':np.zeros((10,10))}

In [95]:
%%timeit

a[0,0,0] += 1


1000000 loops, best of 3: 350 ns per loop

In [96]:
%%timeit

b['a'][0,0] += 1


1000000 loops, best of 3: 365 ns per loop

In [110]:
a = np.random.uniform(size=100).reshape(10,10)
b = defaultdict(dict)
for x in range(a.shape[0]):
    for y in range(a.shape[1]):
        b[str(x)][str(y)] = a[x,y]

In [113]:
%%timeit
a[0,0] += 1


1000000 loops, best of 3: 312 ns per loop

In [114]:
%%timeit
b['0']['0'] += 1


1000000 loops, best of 3: 249 ns per loop

Basic testing


In [115]:
%%timeit

[x**2 for x in range(100)]


100000 loops, best of 3: 7.91 µs per loop

In [118]:
%%timeit

l = []
for x in range(100):
    l.append(x**2)


10000 loops, best of 3: 17 µs per loop

Speeding up drawing from a numpy distribution


In [ ]: