In [1]:
import pickle as pkl
import sampler
import readwrite as rw
import os
import matplotlib.pyplot as plt

from joblib import Parallel, delayed

from collections import Counter

%load_ext autoreload
%autoreload 2

%matplotlib inline

In [3]:
num_subsampled = []
def count_subsampled(i):
    SIMPATH = 'sim1000'
    PKL_HANDLE = '20150910_PB2s{0}.pkl'.format(i)
    
    ps = rw.load(os.path.join(SIMPATH, PKL_HANDLE))
    
    return len(ps.subsampled)


num_subsampled = Parallel(n_jobs=-1)(delayed(count_subsampled)(i) for i in range(1000))

In [4]:
plt.hist(num_subsampled)


Out[4]:
(array([   9.,   35.,   92.,  180.,  215.,  195.,  173.,   62.,   33.,    6.]),
 array([  950.,   955.,   960.,   965.,   970.,   975.,   980.,   985.,
          990.,   995.,  1000.]),
 <a list of 10 Patch objects>)

In [5]:
min(num_subsampled)


Out[5]:
950

In [6]:
max(num_subsampled)


Out[6]:
1000

In [7]:
import numpy as np
np.median(num_subsampled)


Out[7]:
974.0

In [ ]: