In [1]:
import numpy as np
%reload_ext cythonmagic
%reload_ext memory_profiler
In [2]:
pos1 = np.random.randint(0, 10000000, size=10000000)
pos1 = np.unique(pos1)
pos1.shape
Out[2]:
In [3]:
pos2 = np.random.randint(0, 10000000, size=10000000)
pos2 = np.unique(pos2)
pos2.shape
Out[3]:
In [4]:
pos1
Out[4]:
In [5]:
pos2
Out[5]:
In [6]:
def intersect_positions_impl1(pos1, pos2):
cond1 = np.in1d(pos1, pos2)
cond2 = np.in1d(pos2, pos1)
return cond1, cond2
In [7]:
cond1, cond2 = intersect_positions_impl1(pos1, pos2)
In [8]:
pos1[cond1]
Out[8]:
In [9]:
pos2[cond2]
Out[9]:
In [10]:
np.count_nonzero(cond1)
Out[10]:
In [11]:
%timeit intersect_positions_impl1(pos1, pos2)
In [12]:
%memit intersect_positions_impl1(pos1, pos2)
In [13]:
%%cython
import numpy as np
cimport numpy as np
def intersect_positions_impl2(np.ndarray[np.int64_t, ndim=1] pos1,
np.ndarray[np.int64_t, ndim=1] pos2):
cdef int i = 0
cdef int j = 0
cdef np.ndarray[np.uint8_t, ndim=1] cond1
cdef np.ndarray[np.uint8_t, ndim=1] cond2
cond1 = np.zeros((pos1.size, ), dtype=np.uint8)
cond2 = np.zeros((pos2.size, ), dtype=np.uint8)
cdef int m = pos1.size
cdef int n = pos2.size
while i < m and j < n:
if pos1[i] < pos2[j]:
# advance left
i += 1
elif pos1[i] > pos2[j]:
# advance right
j += 1
else:
# match
cond1[i] = 1
cond2[j] = 1
# advance both
i += 1
j += 1
return cond1.astype('b1'), cond2.astype('b1')
In [14]:
cond1, cond2 = intersect_positions_impl2(pos1, pos2)
In [15]:
pos1[cond1]
Out[15]:
In [16]:
pos2[cond2]
Out[16]:
In [17]:
np.count_nonzero(cond1)
Out[17]:
In [18]:
%timeit intersect_positions_impl2(pos1, pos2)
In [19]:
%memit intersect_positions_impl2(pos1, pos2)
In [20]:
pos1.nbytes / 1.e6
Out[20]:
In [21]:
# dev imports
import sys
import numpy as np
%reload_ext memory_profiler
sys.path.insert(0, '../src')
%reload_ext autoreload
%autoreload 1
%aimport anhima.gt
%aimport anhima.af
%aimport anhima.util
In [22]:
genotypes = np.random.randint(0, 2, size=(100000, 1000, 2)).astype('i1')
genotypes.shape, genotypes.nbytes / 1e6
Out[22]:
In [23]:
gn = anhima.gt.as_n_alt(genotypes)
In [24]:
gn.shape, gn.nbytes / 1e6
Out[24]:
In [25]:
gn2 = anhima.util.block_apply(anhima.gt.as_n_alt, genotypes, axis=0, block_size=1001)
In [26]:
gn2.shape, gn2.nbytes / 1e6
Out[26]:
In [27]:
np.array_equal(gn, gn2)
Out[27]:
In [28]:
ac = anhima.af.allele_counts(genotypes)
In [29]:
ac.shape
Out[29]:
In [30]:
ac2 = anhima.util.block_apply(anhima.af.allele_counts, genotypes, axis=0, block_size=1001)
In [31]:
ac2.shape
Out[31]:
In [32]:
np.array_equal(ac, ac2)
Out[32]:
In [33]:
%timeit anhima.gt.as_n_alt(genotypes)
In [34]:
%timeit anhima.util.block_apply(anhima.gt.as_n_alt, genotypes, axis=0, block_size=1001)
In [35]:
%timeit anhima.af.allele_counts(genotypes)
In [36]:
%timeit anhima.util.block_apply(anhima.af.allele_counts, genotypes, axis=0, block_size=1001)
In [37]:
%memit anhima.gt.as_n_alt(genotypes)
In [38]:
%memit anhima.util.block_apply(anhima.gt.as_n_alt, genotypes, axis=0, block_size=1001)
In [39]:
packed = anhima.gt.pack_diploid(genotypes)
In [40]:
packed2 = anhima.util.block_apply(anhima.gt.pack_diploid, genotypes, axis=0, block_size=1001)
In [41]:
np.array_equal(packed, packed2)
Out[41]:
In [42]:
%memit anhima.gt.pack_diploid(genotypes)
In [43]:
%memit anhima.util.block_apply(anhima.gt.pack_diploid, genotypes, axis=0, block_size=1001)
In [44]:
packed.nbytes / 1e6
Out[44]:
In [45]:
unpacked = anhima.gt.unpack_diploid(packed)
In [46]:
unpacked2 = anhima.util.block_apply(anhima.gt.unpack_diploid, packed2, axis=0, block_size=1001)
In [47]:
np.array_equal(unpacked, unpacked2)
Out[47]:
In [48]:
%memit anhima.gt.unpack_diploid(packed)
In [49]:
%memit anhima.util.block_apply(anhima.gt.unpack_diploid, packed2, axis=0, block_size=1001)
In [ ]: