In [21]:
from __future__ import print_function
import numpy as np
np.random.seed(1)
import scipy.stats
import sys
import cProfile
%reload_ext memory_profiler
# dev imports
sys.path.insert(0, '../src')
%reload_ext autoreload
%autoreload 1
%aimport anhima.sim
%aimport anhima.gt
%aimport anhima.ped

In [2]:
# simulate random biallelic genotypes in a cross with 2 parents and 10 progeny
n_samples = 12
n_variants = 10**6
ploidy = 2
af_dist = scipy.stats.beta(a=.9, b=.1)
p_missing = .1
# progeny totally unrelated to parents, should give plenty of mendel errors
genotypes = anhima.sim.simulate_biallelic_genotypes(n_variants, n_samples, 
                                                    af_dist=af_dist, 
                                                    p_missing=p_missing, 
                                                    ploidy=ploidy)
parents = genotypes[:, :2, :]
progeny = genotypes[:, 2:, :]

In [3]:
me = anhima.ped.diploid_mendelian_error(parents, progeny)
me


Out[3]:
array([[0, 1, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 1, 1],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [4]:
%timeit anhima.ped.diploid_mendelian_error(parents, progeny)


1 loops, best of 3: 447 ms per loop

In [5]:
%memit anhima.ped.diploid_mendelian_error(parents, progeny)


peak memory: 193.02 MiB, increment: 31.57 MiB

In [6]:
cProfile.run('anhima.ped.diploid_mendelian_error(parents, progeny)', sort='time')


         146 function calls in 0.449 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
        5    0.247    0.049    0.247    0.049 necompiler.py:662(evaluate)
        2    0.115    0.058    0.216    0.108 gt.py:704(as_012)
        2    0.041    0.021    0.041    0.021 {method 'reduce' of 'numpy.ufunc' objects}
        2    0.034    0.017    0.034    0.017 gt.py:300(is_het)
        1    0.010    0.010    0.010    0.010 {method 'astype' of 'numpy.ndarray' objects}
        2    0.001    0.001    0.001    0.001 {method 'fill' of 'numpy.ndarray' objects}
        1    0.001    0.001    0.449    0.449 ped.py:123(diploid_mendelian_error)
        5    0.000    0.000    0.000    0.000 necompiler.py:462(getContext)
       21    0.000    0.000    0.000    0.000 {numpy.core.multiarray.array}
        2    0.000    0.000    0.036    0.018 gt.py:486(is_hom_alt)
        2    0.000    0.000    0.029    0.015 gt.py:395(is_hom_ref)
       21    0.000    0.000    0.000    0.000 numeric.py:392(asarray)
        5    0.000    0.000    0.000    0.000 {sorted}
        2    0.000    0.000    0.041    0.021 fromnumeric.py:2048(amax)
       11    0.000    0.000    0.000    0.000 necompiler.py:611(getType)
        1    0.000    0.000    0.449    0.449 <string>:1(<module>)
        2    0.000    0.000    0.041    0.021 _methods.py:15(_amax)
        2    0.000    0.000    0.000    0.000 {numpy.core.multiarray.empty}
        5    0.000    0.000    0.000    0.000 {isinstance}
       10    0.000    0.000    0.000    0.000 {sys._getframe}
        5    0.000    0.000    0.000    0.000 {method 'items' of 'dict' objects}
        5    0.000    0.000    0.000    0.000 {zip}
        5    0.000    0.000    0.000    0.000 {method 'get' of 'dict' objects}
        5    0.000    0.000    0.000    0.000 {method 'copy' of 'dict' objects}
       10    0.000    0.000    0.000    0.000 {method 'pop' of 'dict' objects}
       11    0.000    0.000    0.000    0.000 {method 'append' of 'list' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}



In [25]:
me2 = anhima.ped.diploid_mendelian_error_multiallelic(parents, progeny)
me2


Out[25]:
array([[0, 1, 0, ..., 0, 0, 0],
       [0, 1, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ..., 
       [0, 0, 0, ..., 0, 0, 0],
       [1, 1, 1, ..., 0, 1, 1],
       [0, 0, 0, ..., 0, 0, 0]], dtype=uint8)

In [26]:
np.array_equal(me, me2)


Out[26]:
True

In [27]:
%timeit anhima.ped.diploid_mendelian_error_multiallelic(parents, progeny)


1 loops, best of 3: 1.62 s per loop

In [28]:
%memit anhima.ped.diploid_mendelian_error_multiallelic(parents, progeny)


peak memory: 419.62 MiB, increment: 120.59 MiB

In [29]:
cProfile.run('anhima.ped.diploid_mendelian_error_multiallelic(parents, progeny)', sort='time')


         97 function calls in 1.596 seconds

   Ordered by: internal time

   ncalls  tottime  percall  cumtime  percall filename:lineno(function)
       12    0.982    0.082    0.982    0.082 {method 'reduce' of 'numpy.ufunc' objects}
        1    0.489    0.489    1.596    1.596 ped.py:251(diploid_mendelian_error_multiallelic)
        2    0.092    0.046    0.402    0.201 gt.py:763(as_allele_counts)
        2    0.022    0.011    0.022    0.011 {method 'astype' of 'numpy.ndarray' objects}
        1    0.008    0.008    0.008    0.008 necompiler.py:662(evaluate)
        2    0.003    0.001    0.003    0.001 {numpy.core.multiarray.zeros}
       11    0.000    0.000    0.000    0.000 {numpy.core.multiarray.array}
        5    0.000    0.000    0.440    0.088 fromnumeric.py:1621(sum)
        6    0.000    0.000    0.590    0.098 _methods.py:23(_sum)
        1    0.000    0.000    0.008    0.008 gt.py:117(is_missing)
        1    0.000    0.000    0.000    0.000 necompiler.py:462(getContext)
        6    0.000    0.000    0.000    0.000 {isinstance}
        2    0.000    0.000    0.323    0.161 fromnumeric.py:1842(all)
        2    0.000    0.000    0.031    0.015 fromnumeric.py:1762(any)
        4    0.000    0.000    0.000    0.000 numeric.py:462(asanyarray)
        7    0.000    0.000    0.000    0.000 numeric.py:392(asarray)
        1    0.000    0.000    1.596    1.596 <string>:1(<module>)
        2    0.000    0.000    0.030    0.015 _methods.py:31(_any)
        2    0.000    0.000    0.040    0.020 _methods.py:15(_amax)
        2    0.000    0.000    0.040    0.020 fromnumeric.py:2048(amax)
        2    0.000    0.000    0.323    0.161 _methods.py:35(_all)
        2    0.000    0.000    0.323    0.161 {method 'all' of 'numpy.ndarray' objects}
        2    0.000    0.000    0.031    0.015 {method 'any' of 'numpy.ndarray' objects}
        1    0.000    0.000    0.149    0.149 {method 'sum' of 'numpy.ndarray' objects}
        1    0.000    0.000    0.000    0.000 {sorted}
        1    0.000    0.000    0.000    0.000 {max}
        2    0.000    0.000    0.000    0.000 necompiler.py:611(getType)
        1    0.000    0.000    0.000    0.000 {range}
        1    0.000    0.000    0.000    0.000 {zip}
        2    0.000    0.000    0.000    0.000 {sys._getframe}
        1    0.000    0.000    0.000    0.000 {method 'get' of 'dict' objects}
        1    0.000    0.000    0.000    0.000 {method 'items' of 'dict' objects}
        1    0.000    0.000    0.000    0.000 {method 'disable' of '_lsprof.Profiler' objects}
        2    0.000    0.000    0.000    0.000 {len}
        1    0.000    0.000    0.000    0.000 {method 'copy' of 'dict' objects}
        2    0.000    0.000    0.000    0.000 {method 'pop' of 'dict' objects}
        2    0.000    0.000    0.000    0.000 {method 'append' of 'list' objects}



In [ ]: