anhima.util
- Miscellaneous utilities
In [1]:
from __future__ import print_function, division
import sys
import numpy as np
import random
# import anhima
# dev imports
sys.path.insert(0, '..')
%reload_ext autoreload
%autoreload 1
%aimport anhima.util
In [2]:
n_variants = 100000
n_samples = 1000
genotypes = np.random.randint(0, 2, size=(n_variants, n_samples, 2)).astype('i1')
genotypes.shape, genotypes.nbytes / 1e6
Out[2]:
In [3]:
gn = anhima.gt.as_n_alt(genotypes)
In [4]:
gn2 = anhima.util.block_apply(anhima.gt.as_n_alt, genotypes, block_size=1000)
In [5]:
assert np.array_equal(gn, gn2)
In [6]:
ac = anhima.af.allele_counts(genotypes)
In [7]:
ac2 = anhima.util.block_apply(anhima.af.allele_counts, genotypes, block_size=1000)
In [8]:
assert np.array_equal(ac, ac2)
In [9]:
packed = anhima.gt.pack_diploid(genotypes)
In [10]:
packed2 = anhima.util.block_apply(anhima.gt.pack_diploid, genotypes, block_size=1000)
In [11]:
assert np.array_equal(packed, packed2)
In [12]:
row_indices = sorted(random.sample(range(n_variants), n_variants//10))
col_indices = sorted(random.sample(range(n_samples), n_samples//10))
In [13]:
g = genotypes.take(row_indices, axis=0).take(col_indices, axis=1)
In [14]:
g2 = anhima.util.block_take2d(genotypes, row_indices, col_indices, block_size=1000)
In [15]:
assert np.array_equal(g, g2)
In [16]: