1 define exp. growth, then bottleneck model function



In [1]:

    
from ipyparallel import Client

cl = Client()

cl.ids









    Out[1]:





[0, 1, 2, 3, 4, 6, 7, 8, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]



In [37]:

    
%%px --local

# run whole cell on all engines a well as in the local IPython session

import numpy # dadi calls numpy (not np)

import sys

sys.path.insert(0, '/home/claudius/Downloads/dadi')

import dadi

I am going to use unfolded spectra from ANGSD that are folded in dadi.



In [3]:

    
%%px --local

# import 1D spectrum of ery on all engines:
fs_ery = dadi.Spectrum.from_file('ERY.unfolded.sfs').fold()

# import 1D spectrum of ery on all engines:
fs_par = dadi.Spectrum.from_file('PAR.unfolded.sfs').fold()

define exp. growth, then bottleneck model function



In [15]:

    
%psource dadi.Demographics1D.growth



In [23]:

    
%%px --local

def expGrowth_bottleneck(params, ns, pts):
    """
    exponential growth followed by instantanous size change.

    params = (nuB,TB,nuF,TF)
    ns = (n1,)

    nuB: Ratio of population size after exponential growth to ancient
         population size
    nuF: Ratio of contemporary to ancient population size
    TB: Time during which exponential growth happened
       (in units of 2*Na generations) 
    TF: Time in the past at which instantanous size change happened (after exp. growth)
    n1: Number of samples in resulting Spectrum
    pts: Number of grid points to use in integration.
    """
    nuB,TB,nuF,TF = params

    xx = dadi.Numerics.default_grid(pts)
    phi = dadi.PhiManip.phi_1D(xx)

    nu_func = lambda t: numpy.exp(numpy.log(nuB) * t/TB)
    phi = dadi.Integration.one_pop(phi, xx, TB, nu_func)
    
    phi = dadi.Integration.one_pop(phi, xx, TF, nuF)

    fs = dadi.Spectrum.from_phi(phi, ns, (xx,))
    return fs

This model specifies exponential growth/decline toward $\nu_B$ for some time $TB$, after which the population size undergoes an instantaneous size change to the contemporary size (ratio with $N_{ref}$).



In [24]:

    
%%px --local

# create link to function that specifies the model
func = expGrowth_bottleneck

# create extrapolating version of the model function
func_ex = dadi.Numerics.make_extrap_log_func(func)



In [33]:

    
%%px

# set up global variables on engines required for run_dadi function call

dadi_opt_func = dadi.Inference.optimize_log_fmin # uses Nelder-Mead algorithm
sfs = fs_ery # use ERY spectrum
perturb = True
fold = 2 # perturb randomly up to 6-fold
maxiter = 100 # run a maximum of 100 iterations
verbose = 0
full_output = True # need to have full output to get the warnflags (see below)
outname = "OUT_expGrowth_bottleneck/ERY_perturb" # set file name stub for opt. result files

# set lower and upper bounds to nu and T
upper_bound = [1e4, 5, 1e4, 5]
lower_bound = [1e-4, 0, 1e-4, 0]

ns = fs_ery.sample_sizes # both populations have the same sample size

fs_ery.pop_ids = ['ery']
fs_par.pop_ids = ['par']

# setting the smallest grid size slightly larger than the largest population sample size (36)
pts_l = [40, 50, 60]



In [26]:

    
lbview = cl.load_balanced_view()



In [27]:

    
def run_dadi(p_init): # for the function to be called with map, it needs to have one input variable
    """
    p_init: initial parameter values to run optimisation from
    """
    if perturb == True:
        p_init = dadi.Misc.perturb_params(p_init, fold=fold, 
                                      upper_bound=upper_bound, lower_bound=lower_bound)
        # note upper_bound and lower_bound variables are expected to be in the namespace of each engine
    # run optimisation of paramters
    popt = dadi_opt_func(p0=p_init, data=sfs, model_func=func_ex, pts=pts_l, \
                                   lower_bound=lower_bound, upper_bound=upper_bound, \
                                   verbose=verbose, maxiter=maxiter, full_output=full_output)
    # pickle to file
    import dill
    name = outname[:] # make copy of file name stub!
    for p in p_init:
        name += "_%.4f" % (p)
    with open(name + ".dill", "w") as fh:
        dill.dump((p_init, popt), fh)
    
    return p_init, popt



In [28]:

    
from itertools import repeat



In [38]:

    
# perturb parameter neutral values
p0 = [1, 1, 1, 1]

ar_ery_1 = lbview.map(run_dadi, repeat(p0, 10), block=False, order=True)



In [40]:

    
def get_flag_count(out, NM=True):
    """
    out: list of tuples, each containing p_init and popt + additional info, including warnflags
    as produced by run_dadi.py
    """
    from collections import defaultdict
    
    if NM: # if ar from Nelder-Mead
        i = 4 # the warnflag is reported at index position 4 in the output array
    else: # ar from BFGS optimisation
        i = 6
    
    warnflag = defaultdict(int)

    for res in out:
        if res[1][i] == 1: # notice the change in indexing
            warnflag[1] +=1
        elif res[1][i] == 2:
            warnflag[2] += 1
        elif res[1][i] == 0:
            warnflag[0] += 1
        else:
            warnflag[999] +=1
    if NM:
        print "success", warnflag[0]
        print "Maximum number of function evaluations made.", warnflag[1]
        print "Maximum number of iterations reached.", warnflag[2]
        print "unknown flag", warnflag[999]
    else:
        print "success", warnflag[0]
        print "Maximum number of iterations exceeded.", warnflag[1]
        print "Gradient and/or function calls not changing.", warnflag[2]
        print "unknown flag", warnflag[999]



In [41]:

    
get_flag_count(ar_ery_1, NM=True)









    



success 1
Maximum number of function evaluations made. 0
Maximum number of iterations reached. 9
unknown flag 0



In [42]:

    
def flatten(array):
    """
        Returns a list of flattened elements of every inner lists (or tuples)
        ****RECURSIVE****
    """
    import numpy
    res = []
    for el in array:
        if isinstance(el, (list, tuple, numpy.ndarray)):
            res.extend(flatten(el))
            continue
        res.append(el)
    return list( res )



In [51]:

    
success = [flatten(out)[:9] for out in ar_ery_1 if out[1][4] == 0]



In [52]:

    
import pandas as pd



In [53]:

    
df = pd.DataFrame(data=success, \
                  columns=['nuB_0','TB_0', 'nuF_0', 'TF_0', 'nuB_opt', 'TB_opt', 'nuF_opt', 'TF_opt', '-logL'])

df.sort_values(by='-logL', ascending=True)









    Out[53]:






  
    
      
      nuB_0
      TB_0
      nuF_0
      TF_0
      nuB_opt
      TB_opt
      nuF_opt
      TF_opt
      -logL
    
  
  
    
      0
      1.263052
      0.865659
      0.340853
      0.347439
      1.384605
      0.803309
      0.289043
      1.267706
      2167.753201



In [54]:

    
print success[0][4:8]









    



[1.3846053356142007, 0.8033093178294235, 0.28904322070927396, 1.2677064459192211]



In [55]:

    
# perturb previous optimal parameter combination
p0 = success[0][4:8]

ar_ery_1 = lbview.map(run_dadi, repeat(p0, 10), block=False, order=True)



In [56]:

    
get_flag_count(ar_ery_1, NM=True)









    



success 4
Maximum number of function evaluations made. 0
Maximum number of iterations reached. 6
unknown flag 0



In [57]:

    
success = [flatten(out)[:9] for out in ar_ery_1 if out[1][4] == 0]

df = pd.DataFrame(data=success, \
                  columns=['nuB_0','TB_0', 'nuF_0', 'TF_0', 'nuB_opt', 'TB_opt', 'nuF_opt', 'TF_opt', '-logL'])

df.sort_values(by='-logL', ascending=True)









    Out[57]:






  
    
      
      nuB_0
      TB_0
      nuF_0
      TF_0
      nuB_opt
      TB_opt
      nuF_opt
      TF_opt
      -logL
    
  
  
    
      3
      1.382766
      0.241601
      0.077028
      0.342455
      1.386752
      0.220245
      0.063039
      0.384148
      2167.753189
    
    
      0
      1.059698
      0.929504
      0.091952
      0.788598
      1.063068
      0.927393
      0.149945
      0.767699
      2167.753192
    
    
      2
      0.990318
      0.428491
      0.242318
      0.779877
      0.990106
      0.434801
      0.161338
      0.807679
      2167.753193
    
    
      1
      0.563572
      2.015404
      0.102770
      3.277319
      0.565273
      2.002220
      0.089449
      3.131931
      2172.825525

The optimised parameter values do not deviate very much from the initial parameter values.



In [58]:

    
# specify the initial parameter values, they will be randomly perturbed by up to a factor of 4
p0 = [100, 0.01, 0.1, 1] # quick exp. growth, then long period of small size

ar_ery_2 = lbview.map(run_dadi, repeat(p0, 10), block=False, order=True)



In [60]:

    
success = [flatten(out)[:9] for out in ar_ery_2 if out[1][4] == 0]

df = pd.DataFrame(data=success, \
                  columns=['nuB_0','TB_0', 'nuF_0', 'TF_0', 'nuB_opt', 'TB_opt', 'nuF_opt', 'TF_opt', '-logL'])

df.sort_values(by='-logL', ascending=True)









    Out[60]:






  
    
      
      nuB_0
      TB_0
      nuF_0
      TF_0
      nuB_opt
      TB_opt
      nuF_opt
      TF_opt
      -logL
    
  
  
    
      2
      288.897957
      0.006640
      0.293071
      0.649314
      452.463434
      0.003161
      0.165810
      0.825642
      2167.753193
    
    
      1
      33.888463
      0.009623
      0.053050
      0.887303
      49.583040
      0.007919
      0.176998
      0.868265
      2167.753193
    
    
      3
      78.930914
      0.004743
      0.076432
      1.054879
      194.383486
      0.002071
      0.232022
      1.057746
      2167.753197
    
    
      0
      286.169004
      0.009809
      0.041980
      1.157481
      589.244354
      0.003207
      0.267345
      1.168653
      2167.753200



In [61]:

    
# specify the initial parameter values, they will be randomly perturbed by up to a factor of 4
p0 = [0.01, 0.01, 10, 1] # quick exp. decline, then long period of increased size

ar_ery_3 = lbview.map(run_dadi, repeat(p0, 10), block=False, order=True)



In [62]:

    
success = [flatten(out)[:9] for out in ar_ery_3 if out[1][4] == 0]

df = pd.DataFrame(data=success, \
                  columns=['nuB_0','TB_0', 'nuF_0', 'TF_0', 'nuB_opt', 'TB_opt', 'nuF_opt', 'TF_opt', '-logL'])

df.sort_values(by='-logL', ascending=True)









    Out[62]:






  
    
      
      nuB_0
      TB_0
      nuF_0
      TF_0
      nuB_opt
      TB_opt
      nuF_opt
      TF_opt
      -logL
    
  
  
    
      0
      0.003391
      0.014814
      14.217875
      1.071744
      0.01598
      0.000869
      0.260179
      1.127289
      2167.753199

In all successful optimisations above, $\nu_F$, the ratio of contemporary population size to ancient population size, converges to a value below 1/3.



In [63]:

    
# perturb previous optimal parameter combination
p0 = success[0][4:8]

ar_ery_4 = lbview.map(run_dadi, repeat(p0, 10), block=False, order=True)



In [64]:

    
get_flag_count(ar_ery_4, NM=True)









    



success 2
Maximum number of function evaluations made. 0
Maximum number of iterations reached. 8
unknown flag 0



In [65]:

    
success = [flatten(out)[:9] for out in ar_ery_4 if out[1][4] == 0]

df = pd.DataFrame(data=success, \
                  columns=['nuB_0','TB_0', 'nuF_0', 'TF_0', 'nuB_opt', 'TB_opt', 'nuF_opt', 'TF_opt', '-logL'])

df.sort_values(by='-logL', ascending=True)









    Out[65]:






  
    
      
      nuB_0
      TB_0
      nuF_0
      TF_0
      nuB_opt
      TB_opt
      nuF_opt
      TF_opt
      -logL
    
  
  
    
      0
      0.007491
      0.000671
      0.073788
      0.316113
      0.005604
      0.000627
      0.059733
      0.35875
      2167.753189
    
    
      1
      0.012323
      0.001271
      0.084668
      3.136494
      0.012072
      0.001157
      0.085324
      3.24476
      2172.825525



In [73]:

    
# save optimisation results to file

import dill

optout = []

for ar_ery in (ar_ery_1, ar_ery_2, ar_ery_3, ar_ery_4):
    optout.extend(list(ar_ery.get()))

dill.dump(optout, open("OUT_expGrowth_bottleneck/ERY_perturb_ar_ery.dill", "w"))

parallelus



In [74]:

    
%%px

# set up global variables on engines required for run_dadi function call

dadi_opt_func = dadi.Inference.optimize_log_fmin # uses Nelder-Mead algorithm
sfs = fs_par # use PAR spectrum
perturb = True
fold = 2 # perturb randomly up to 6-fold
maxiter = 100 # run a maximum of 100 iterations
verbose = 0
full_output = True # need to have full output to get the warnflags (see below)
outname = "OUT_expGrowth_bottleneck/PAR_perturb" # set file name stub for opt. result files



In [75]:

    
p0 = [1, 1, 1, 1]

ar_par_1 = lbview.map(run_dadi, repeat(p0, 10), block=False, order=False)



In [76]:

    
%ll OUT_expGrowth_bottleneck/PAR*









    



-rw-rw-r-- 1 claudius 314 Apr 24 11:55 OUT_expGrowth_bottleneck/PAR_perturb_0.3903_0.9819_1.3026_0.2697.dill
-rw-rw-r-- 1 claudius 314 Apr 24 11:41 OUT_expGrowth_bottleneck/PAR_perturb_0.4756_0.7191_0.4371_0.5403.dill
-rw-rw-r-- 1 claudius 314 Apr 24 11:38 OUT_expGrowth_bottleneck/PAR_perturb_0.6027_2.3375_0.5953_1.6260.dill
-rw-rw-r-- 1 claudius 314 Apr 24 11:37 OUT_expGrowth_bottleneck/PAR_perturb_0.8309_0.4067_0.3150_2.8543.dill
-rw-rw-r-- 1 claudius 314 Apr 24 11:37 OUT_expGrowth_bottleneck/PAR_perturb_1.0791_0.3373_0.5508_0.9630.dill
-rw-rw-r-- 1 claudius 314 Apr 24 11:37 OUT_expGrowth_bottleneck/PAR_perturb_1.2331_0.2817_1.9411_1.4572.dill
-rw-rw-r-- 1 claudius 314 Apr 24 11:37 OUT_expGrowth_bottleneck/PAR_perturb_1.7138_0.7279_0.7608_0.3195.dill
-rw-rw-r-- 1 claudius 314 Apr 24 11:45 OUT_expGrowth_bottleneck/PAR_perturb_2.1220_0.3901_0.5765_1.1659.dill
-rw-rw-r-- 1 claudius 314 Apr 24 11:38 OUT_expGrowth_bottleneck/PAR_perturb_3.7093_0.2654_0.2896_0.8007.dill



In [77]:

    
ar_par_1 = []

import glob

for filename in glob.glob("OUT_expGrowth_bottleneck/PAR*"):
    ar_par_1.append(dill.load(open(filename)))



In [78]:

    
get_flag_count(ar_par_1, NM=True)









    



success 2
Maximum number of function evaluations made. 0
Maximum number of iterations reached. 7
unknown flag 0



In [79]:

    
success = [flatten(out)[:9] for out in ar_par_1 if out[1][4] == 0]

df = pd.DataFrame(data=success, \
                  columns=['nuB_0','TB_0', 'nuF_0', 'TF_0', 'nuB_opt', 'TB_opt', 'nuF_opt', 'TF_opt', '-logL'])

df.sort_values(by='-logL', ascending=True)









    Out[79]:






  
    
      
      nuB_0
      TB_0
      nuF_0
      TF_0
      nuB_opt
      TB_opt
      nuF_opt
      TF_opt
      -logL
    
  
  
    
      1
      3.709310
      0.265380
      0.289571
      0.800686
      0.614397
      0.052903
      0.018110
      1.197595
      6929.444306
    
    
      0
      0.830906
      0.406732
      0.314983
      2.854260
      0.844383
      0.541158
      0.151917
      4.945685
      6929.444306



In [82]:

    
success[1][4:8]









    Out[82]:





[0.61439714659107336,
 0.052903193195409676,
 0.018109892933906826,
 1.197594561232507]



In [83]:

    
p0 = success[1][4:8]

ar_par_2 = lbview.map(run_dadi, repeat(p0, 10), block=False, order=False)



In [84]:

    
get_flag_count(ar_par_2, NM=True)









    



success 10
Maximum number of function evaluations made. 0
Maximum number of iterations reached. 0
unknown flag 0



In [85]:

    
success = [flatten(out)[:9] for out in ar_par_2 if out[1][4] == 0]

df = pd.DataFrame(data=success, \
                  columns=['nuB_0','TB_0', 'nuF_0', 'TF_0', 'nuB_opt', 'TB_opt', 'nuF_opt', 'TF_opt', '-logL'])

df.sort_values(by='-logL', ascending=True)









    Out[85]:






  
    
      
      nuB_0
      TB_0
      nuF_0
      TF_0
      nuB_opt
      TB_opt
      nuF_opt
      TF_opt
      -logL
    
  
  
    
      3
      0.472403
      0.070881
      0.011387
      1.288252
      0.469248
      0.072113
      0.008440
      1.280387
      6929.444306
    
    
      6
      1.768930
      0.036260
      0.042468
      0.370434
      1.776229
      0.035834
      0.007350
      0.503654
      6929.444306
    
    
      0
      0.218420
      0.156013
      0.020085
      0.364053
      0.229155
      0.156113
      0.006889
      0.410683
      6929.444306
    
    
      4
      1.358809
      0.154517
      0.008247
      2.280554
      1.385853
      0.144384
      0.011778
      2.240702
      6929.444306
    
    
      7
      0.732430
      0.016699
      0.033366
      0.677204
      0.728324
      0.018477
      0.016094
      0.702511
      6929.444306
    
    
      1
      0.218059
      0.181408
      0.026684
      1.834435
      0.213655
      0.184490
      0.022174
      1.819735
      6929.444306
    
    
      2
      0.459281
      0.034909
      0.015712
      0.441546
      0.479841
      0.037059
      0.010306
      0.448867
      6929.444306
    
    
      5
      1.197897
      0.020329
      0.023638
      2.330702
      1.199745
      0.018330
      0.018651
      2.246834
      6929.444306
    
    
      9
      1.903343
      0.019700
      0.034356
      0.722134
      1.901370
      0.018326
      0.017947
      0.745342
      6929.444306
    
    
      8
      1.316753
      0.098304
      0.017726
      0.607624
      1.316912
      0.097550
      0.014601
      0.608083
      6929.444306

Again, note that the inferred optimal parameter values only deviate slightly from the initial values and that all optimal parameter combinations have the same likelihood.



In [86]:

    
p0 = [0.01, 0.01, 10, 1] # quick exp. decline, then long period of increased size

ar_par_3 = lbview.map(run_dadi, repeat(p0, 10), block=False, order=False)



In [87]:

    
get_flag_count(ar_par_3, NM=True)









    



success 0
Maximum number of function evaluations made. 0
Maximum number of iterations reached. 10
unknown flag 0

The initial parameter values specified above (and then perturbed) are too far away from an optimum.



In [94]:

    
optout = []

for ar_par in (ar_par_1, list(ar_par_2.get())):
    optout.extend(ar_par)
    
dill.dump(optout, open("OUT_expGrowth_bottleneck/PAR_perturb_ar_ery.dill", "w"))



In [ ]:

	nuB_0	TB_0	nuF_0	TF_0	nuB_opt	TB_opt	nuF_opt	TF_opt	-logL
3	1.382766	0.241601	0.077028	0.342455	1.386752	0.220245	0.063039	0.384148	2167.753189
0	1.059698	0.929504	0.091952	0.788598	1.063068	0.927393	0.149945	0.767699	2167.753192
2	0.990318	0.428491	0.242318	0.779877	0.990106	0.434801	0.161338	0.807679	2167.753193
1	0.563572	2.015404	0.102770	3.277319	0.565273	2.002220	0.089449	3.131931	2172.825525

	nuB_0	TB_0	nuF_0	TF_0	nuB_opt	TB_opt	nuF_opt	TF_opt	-logL
2	288.897957	0.006640	0.293071	0.649314	452.463434	0.003161	0.165810	0.825642	2167.753193
1	33.888463	0.009623	0.053050	0.887303	49.583040	0.007919	0.176998	0.868265	2167.753193
3	78.930914	0.004743	0.076432	1.054879	194.383486	0.002071	0.232022	1.057746	2167.753197
0	286.169004	0.009809	0.041980	1.157481	589.244354	0.003207	0.267345	1.168653	2167.753200

	nuB_0	TB_0	nuF_0	TF_0	nuB_opt	TB_opt	nuF_opt	TF_opt	-logL
0	0.007491	0.000671	0.073788	0.316113	0.005604	0.000627	0.059733	0.35875	2167.753189
1	0.012323	0.001271	0.084668	3.136494	0.012072	0.001157	0.085324	3.24476	2172.825525

	nuB_0	TB_0	nuF_0	TF_0	nuB_opt	TB_opt	nuF_opt	TF_opt	-logL
1	3.709310	0.265380	0.289571	0.800686	0.614397	0.052903	0.018110	1.197595	6929.444306
0	0.830906	0.406732	0.314983	2.854260	0.844383	0.541158	0.151917	4.945685	6929.444306

	nuB_0	TB_0	nuF_0	TF_0	nuB_opt	TB_opt	nuF_opt	TF_opt	-logL
3	0.472403	0.070881	0.011387	1.288252	0.469248	0.072113	0.008440	1.280387	6929.444306
6	1.768930	0.036260	0.042468	0.370434	1.776229	0.035834	0.007350	0.503654	6929.444306
0	0.218420	0.156013	0.020085	0.364053	0.229155	0.156113	0.006889	0.410683	6929.444306
4	1.358809	0.154517	0.008247	2.280554	1.385853	0.144384	0.011778	2.240702	6929.444306
7	0.732430	0.016699	0.033366	0.677204	0.728324	0.018477	0.016094	0.702511	6929.444306
1	0.218059	0.181408	0.026684	1.834435	0.213655	0.184490	0.022174	1.819735	6929.444306
2	0.459281	0.034909	0.015712	0.441546	0.479841	0.037059	0.010306	0.448867	6929.444306
5	1.197897	0.020329	0.023638	2.330702	1.199745	0.018330	0.018651	2.246834	6929.444306
9	1.903343	0.019700	0.034356	0.722134	1.901370	0.018326	0.017947	0.745342	6929.444306
8	1.316753	0.098304	0.017726	0.607624	1.316912	0.097550	0.014601	0.608083	6929.444306

Table of Contents

define exp. growth, then bottleneck model function

parallelus