anhima.util - Miscellaneous utilities


In [1]:
from __future__ import print_function, division
import sys
import numpy as np
import random
# import anhima
# dev imports
sys.path.insert(0, '..')
%reload_ext autoreload
%autoreload 1
%aimport anhima.util

In [2]:
n_variants = 100000
n_samples = 1000
genotypes = np.random.randint(0, 2, size=(n_variants, n_samples, 2)).astype('i1')
genotypes.shape, genotypes.nbytes / 1e6


Out[2]:
((100000, 1000, 2), 200.0)

Block apply


In [3]:
gn = anhima.gt.as_n_alt(genotypes)

In [4]:
gn2 = anhima.util.block_apply(anhima.gt.as_n_alt, genotypes, block_size=1000)

In [5]:
assert np.array_equal(gn, gn2)

In [6]:
ac = anhima.af.allele_counts(genotypes)

In [7]:
ac2 = anhima.util.block_apply(anhima.af.allele_counts, genotypes, block_size=1000)

In [8]:
assert np.array_equal(ac, ac2)

In [9]:
packed = anhima.gt.pack_diploid(genotypes)

In [10]:
packed2 = anhima.util.block_apply(anhima.gt.pack_diploid, genotypes, block_size=1000)

In [11]:
assert np.array_equal(packed, packed2)

Block take2D


In [12]:
row_indices = sorted(random.sample(range(n_variants), n_variants//10))
col_indices = sorted(random.sample(range(n_samples), n_samples//10))

In [13]:
g = genotypes.take(row_indices, axis=0).take(col_indices, axis=1)

In [14]:
g2 = anhima.util.block_take2d(genotypes, row_indices, col_indices, block_size=1000)

In [15]:
assert np.array_equal(g, g2)

In [16]: