In [1]:
import itertools
import numpy as np
import sys
import scipy.stats
import matplotlib.pyplot as plt
%matplotlib inline
sys.path.insert(0, '../..')
%load_ext memory_profiler
%load_ext autoreload
%autoreload 1
%aimport allel.model
%aimport allel.stats
In [2]:
pos = np.unique(np.random.randint(0, 100000000, size=10000000))
pos.size, pos.dtype
Out[2]:
In [3]:
values = np.random.randint(0, 60, size=pos.size)
values
Out[3]:
In [4]:
%timeit allel.stats.moving_statistic(values, statistic=np.sum, size=10000)
In [5]:
%timeit scipy.stats.binned_statistic(np.arange(values.size), values, statistic='sum', bins=np.arange(0, values.size, 10000))
In [6]:
%memit allel.stats.moving_statistic(values, statistic=np.sum, size=10000)
In [7]:
%memit scipy.stats.binned_statistic(np.arange(values.size), values, statistic='sum', bins=np.arange(0, values.size, 10000))
In [8]:
y1 = allel.stats.moving_statistic(values, statistic=np.sum, size=10000)
In [9]:
y2, _, _ = scipy.stats.binned_statistic(np.arange(values.size), values, statistic='sum', bins=np.arange(0, values.size+10000, 10000))
In [10]:
np.array_equal(y1, y2)
Out[10]:
In [11]:
x = allel.stats.moving_statistic(pos, statistic=lambda b: (b[0] + b[-1])/2, size=10000)
In [12]:
plt.plot(x, y1);
In [4]:
h1, w, c = allel.stats.windowed_statistic(pos, values, np.sum, 10000, start=0, stop=10000000, step=10000)
In [5]:
h2, b, _ = scipy.stats.binned_statistic(pos, values, statistic='sum', bins=np.arange(0, 10000001, 10000))
In [15]:
h1.shape
Out[15]:
In [16]:
h2.shape
Out[16]:
In [17]:
np.array_equal(h1, h2)
Out[17]:
In [18]:
b
Out[18]:
In [19]:
w
Out[19]:
In [6]:
%timeit allel.stats.windowed_statistic(pos, values, np.sum, 100000, start=0, stop=100000000)
In [7]:
%timeit scipy.stats.binned_statistic(pos, values, statistic='sum', bins=np.arange(0, 100000000, 100000))
In [8]:
%memit allel.stats.windowed_statistic(pos, values, np.sum, 100000, start=0, stop=100000000)
In [9]:
%memit scipy.stats.binned_statistic(pos, values, statistic='sum', bins=np.arange(0, 100000000, 100000))
In [10]:
is_accessible = np.random.randint(0, 2, size=pos.max()).astype('b1')
is_accessible
Out[10]:
In [11]:
h1, w, c = allel.stats.windowed_statistic(pos, values, np.sum, 10000, start=0, stop=10000000, step=10000)
In [16]:
%timeit allel.stats.per_base(h1, w, is_accessible=is_accessible)
In [ ]: