• Trying to increase the speed and efficiency of SIPSim

In [15]:
import os,sys
import numpy as np
import pandas as pd
import scipy.stats as stats
from collections import defaultdict


/opt/anaconda/lib/python2.7/site-packages/pytz/__init__.py:29: UserWarning: Module argparse was already imported from /opt/anaconda/lib/python2.7/argparse.pyc, but /opt/anaconda/lib/python2.7/site-packages is being added to sys.path
  from pkg_resources import resource_stream

Distributions


In [8]:
%%timeit

np.random.normal(size=1)


1000000 loops, best of 3: 1.62 µs per loop

In [9]:
%%timeit

stats.norm().rvs()


1000 loops, best of 3: 680 µs per loop

dict of pd.DataFrames versus 3d numpy array


In [26]:
ddf = {'a': pd.DataFrame({'a':range(10), 'b':range(10)}),
       'b': pd.DataFrame({'a':range(10), 'b':range(10)})
       }

narr = np.array([np.array(range(10) + range(10)).reshape(2,10).transpose(),
                np.array(range(10) + range(10)).reshape(2,10).transpose()
                ])

In [27]:
ddf


Out[27]:
{'a':    a  b
 0  0  0
 1  1  1
 2  2  2
 3  3  3
 4  4  4
 5  5  5
 6  6  6
 7  7  7
 8  8  8
 9  9  9, 'b':    a  b
 0  0  0
 1  1  1
 2  2  2
 3  3  3
 4  4  4
 5  5  5
 6  6  6
 7  7  7
 8  8  8
 9  9  9}

In [28]:
narr


Out[28]:
array([[[0, 0],
        [1, 1],
        [2, 2],
        [3, 3],
        [4, 4],
        [5, 5],
        [6, 6],
        [7, 7],
        [8, 8],
        [9, 9]],

       [[0, 0],
        [1, 1],
        [2, 2],
        [3, 3],
        [4, 4],
        [5, 5],
        [6, 6],
        [7, 7],
        [8, 8],
        [9, 9]]])

In [ ]: