In [1]:

    
# load dadi module

import sys

sys.path.insert(0, '/home/claudius/Downloads/dadi')

import dadi









    



error in importing Two Locus modules



In [2]:

    
%ll









    



total 2672
-rw-rw-r-- 1 claudius 671626 Apr 23 09:37 01_dadi_1D_exp_growth.ipynb
-rw-rw-r-- 1 claudius 248481 Apr 22 11:27 02_dadi_1D_two_epoch.ipynb
-rw-rw-r-- 1 claudius 148443 Apr 22 12:25 03_1D_bottlegrowth.ipynb
-rw-rw-r-- 1 claudius  43387 Apr 24 13:15 04_1D_expGrowth_bottleneck.ipynb
-rw-rw-r-- 1 claudius 236336 Apr 23 15:14 04_1D_three_epoch.ipynb
-rw-rw-r-- 1 claudius 467067 Apr 25 09:38 05_1D_model_synthesis.ipynb
-rw-rw-r-- 1 claudius 118564 Apr 20 16:47 05_2D.ipynb
-rw-rw-r-- 1 claudius 444729 Apr 21 10:05 1D_models.ipynb
-rw-rw-r-- 1 claudius  33125 Apr  8 18:15 1D_two_epoch_opt_res_ERY.dill
-rw-rw-r-- 1 claudius  16613 Apr  8 19:18 1D_two_epoch_opt_res_PAR.dill
drwxrwxr-x 4 claudius   4096 Apr 20 10:19 dadiExercises/
-rw-rw-r-- 1 claudius  36308 Apr  3 20:33 ery_fold_comp.png
-rw-rw-r-- 1 claudius   3560 Mar 25 08:40 EryPar.FOLDED.2dsfs
-rw-rw-r-- 1 claudius    433 Mar 24 20:15 ERY.unfolded.sfs
-rw-rw-r-- 1 claudius    421 Mar 24 20:14 ERY.unfolded.sfs~
-rw-rw-r-- 1 claudius  13913 Apr  6 15:03 exp_growth_optim_res_ERY.pickle
-rw-rw-r-- 1 claudius  13913 Apr  6 19:21 exp_growth_optim_res_PAR.pickle
drwxrwxr-x 2 claudius   4096 Apr 22 11:55 OUT_bottlegrowth/
drwxrwxr-x 2 claudius  12288 Apr 24 12:55 OUT_expGrowth_bottleneck/
drwxrwxr-x 2 claudius  24576 Apr 21 20:19 OUT_exp_growth_model/
drwxrwxr-x 2 claudius  73728 Apr 19 17:00 OUT_run_dadi/
drwxrwxr-x 2 claudius   4096 Apr 23 12:23 OUT_three_epoch/
drwxrwxr-x 2 claudius   4096 Apr 23 15:01 OUT_two_epoch/
-rw-rw-r-- 1 claudius  37242 Apr  3 20:35 par_fold_comp.png
-rw-rw-r-- 1 claudius    421 Mar 24 20:16 PAR.unfolded.sfs
-rw-rw-r-- 1 claudius    409 Mar 24 20:15 PAR.unfolded.sfs~
-rw-rw-r-- 1 claudius    265 Apr 25 08:16 rsync-filters
-rwxrwxr-x 1 claudius   2790 Apr 17 22:18 run_dadi.py*
-rwxrwxr-x 1 claudius   2790 Apr 17 22:05 run_dadi.py~*
-rw-rw-r-- 1 claudius  12387 Apr 19 16:57 test.ipynb



In [3]:

    
%ll dadiExercises









    



total 33676
lrwxrwxrwx 1 claudius       53 Feb 17 15:37 ERY.FOLDED.sfs -> /data3/claudius/Big_Data/ANGSD/SFS/ERY/ERY.FOLDED.sfs
-rw-rw-r-- 1 claudius      499 Mar 24 14:04 ERY.FOLDED.sfs.dadi_format
-rw-rw-r-- 1 claudius      499 Mar 24 14:02 ERY.FOLDED.sfs.dadi_format~
lrwxrwxrwx 1 claudius       37 Feb 18 17:46 EryPar.unfolded.2dsfs -> ../../ANGSD/FST/EryPar.unfolded.2dsfs
-rw-rw-r-- 1 claudius    13051 Feb 18 19:00 EryPar.unfolded.2dsfs.dadi_format
-rw-rw-r-- 1 claudius    13051 Feb 18 18:31 EryPar.unfolded.2dsfs.dadi_format~
drwxrwxr-x 5 claudius     4096 Feb 17 13:45 examples/
-rw-rw-r-- 1 claudius   155251 Mar 22 12:37 example_YRI_CEU.ipynb
-rw-rw-r-- 1 claudius   619699 Apr 20 10:19 First_Steps_with_dadi.ipynb
-rw-rw-r-- 1 claudius     1012 Mar 16 09:54 new.bib
lrwxrwxrwx 1 claudius       53 Feb 17 15:37 PAR.FOLDED.sfs -> /data3/claudius/Big_Data/ANGSD/SFS/PAR/PAR.FOLDED.sfs
-rw-rw-r-- 1 claudius      486 Mar 24 20:08 PAR.FOLDED.sfs.dadi_format
-rw-rw-r-- 1 claudius      450 Mar 24 20:08 PAR.FOLDED.sfs.dadi_format~
-rw-rw-r-- 1 claudius       18 Mar 19 11:20 seedms
-rw-rw-r-- 1 claudius 33643874 Mar 19 11:20 test.msout



In [2]:

    
# import 1D spectrum of ery
fs_ery = dadi.Spectrum.from_file('dadiExercises/ERY.FOLDED.sfs.dadi_format')

# import 1D spectrum of ery
fs_par = dadi.Spectrum.from_file('dadiExercises/PAR.FOLDED.sfs.dadi_format')

Plot the data



In [3]:

    
import pylab



In [4]:

    
%matplotlib inline



In [5]:

    
pylab.rcParams['figure.figsize'] = [12.0, 10.0]



In [6]:

    
pylab.plot(fs_par, 'go-', label='par')
pylab.plot(fs_ery, 'rs-', label='ery')
pylab.xlabel('minor allele frequency')
pylab.ylabel('SNP count')
pylab.legend()









    Out[6]:





<matplotlib.legend.Legend at 0x7fbebf25da10>

To fold or not to fold by ANGSD

Does estimating an unfolded spectrum with ANGSD and then folding yield a sensible folded SFS when the sites are not polarised with respect to an ancestral allele but with respect to the reference allele? Matteo Fumagalli thinks that this is sensible.



In [9]:

    
! cat ERY.unfolded.sfs









    



37 unfolded
1592046.636125 7148.627587 6831.828430 3473.268669 3417.591990 2249.932322 1980.824357 1011.763357 2262.489617 557.169754 1049.858226 1159.694611 768.373223 1125.393799 448.462048 544.635916 1014.348661 147.731786 975.251801 233.415985 851.137519 12.642136 803.134099 0.128476 567.179523 446.009983 158.967094 484.096759 372.705620 540.860079 95.276852 808.290844 234.084809 614.920896 625.008059 890.804592 2515.454396



In [10]:

    
! cat PAR.unfolded.sfs









    



37 unfolded
1168479.553637 8087.149285 11597.805791 5078.201104 2731.611410 2396.961010 1119.052124 218.045915 2585.701112 363.773630 255.690616 605.580566 1343.351541 0.000009 420.034144 0.035997 1150.501914 0.791354 31.079364 966.640187 6.753017 244.419730 81.604358 674.760980 299.526650 0.000000 340.309236 491.216107 0.085920 264.513993 517.818215 516.871489 279.498804 328.332291 581.014402 390.905752 2489.808345



In [2]:

    
# import 1D unfolded spectrum of ery
fs_ery_unfolded = dadi.Spectrum.from_file('ERY.unfolded.sfs')

# import 1D unfolded spectrum of par
fs_par_unfolded = dadi.Spectrum.from_file('PAR.unfolded.sfs')



In [12]:

    
pylab.plot(fs_ery, 'ro-', label='folded by ANGSD')
pylab.plot(fs_ery_unfolded.fold(), 'bs-', label='folded by ' + r'$\delta$a$\delta$i')
pylab.xlabel('minor allele frequency')
pylab.ylabel('SNP count')
pylab.legend()









    Out[12]:





<matplotlib.legend.Legend at 0x7fb5a393efd0>



In [13]:

    
pylab.plot(fs_par, 'go-', label='folded by ANGSD')
pylab.plot(fs_par_unfolded.fold(), 'bs-', label='folded by ' + r'$\delta$a$\delta$i')
pylab.xlabel('minor allele frequency')
pylab.ylabel('SNP count')
pylab.legend()









    Out[13]:





<matplotlib.legend.Legend at 0x7fb59c077e90>

For parallelus, the spectrum folded by dadi looks better than the spectrum folded by ANGSD. I am therefore going to use spectra folded in dadi.



In [3]:

    
fs_ery = fs_ery_unfolded.fold()
fs_par = fs_par_unfolded.fold()

$\theta$ and implied $N_{ref}$

standard neutral model



In [15]:

    
# create link to built-in model function

func = dadi.Demographics1D.snm

# make the extrapolating version of the demographic model function

func_ex = dadi.Numerics.make_extrap_log_func(func)



In [16]:

    
# setting the smallest grid size slightly larger than the largest population sample size

pts_l = [40, 50, 60]

ns = fs_ery.sample_sizes
ns









    Out[16]:





array([36])



In [17]:

    
# calculate unfolded AFS under standard neutral model (up to a scaling factor theta)

neutral_model = func_ex(0, ns, pts_l)
neutral_model









    Out[17]:





Spectrum([-- 0.9956799394019464 0.4981564278703058 0.3321898574986928
 0.24919667744224816 0.19939790258679638 0.1661970878062821
 0.14248104377018878 0.1246930658790548 0.11085717946547267
 0.09978779124736985 0.09073040723382213 0.08318202250611753
 0.07679443263533685 0.0713189249265068 0.06657306836113545
 0.06242002970483825 0.0587552149564719 0.055497288680948074
 0.052582000282866075 0.04995792826310336 0.04758347287343962
 0.04542461915254542 0.0434532518907827 0.04164592443914579
 0.03998294873211848 0.03844767650121102 0.03702592302757683
 0.03570552430576448 0.03447599502439583 0.03332824999581658
 0.032254375802771754 0.031247447500913152 0.0303013796010043
 0.029410802565291765 0.02857096039285141 --], folded=False, pop_ids=None)



In [18]:

    
theta_ery = dadi.Inference.optimal_sfs_scaling(neutral_model, fs_ery)
theta_ery









    Out[18]:





10617.328085456724



In [19]:

    
theta_par = dadi.Inference.optimal_sfs_scaling(neutral_model, fs_par)
theta_par









    Out[19]:





10632.738922047551

What effective ancestral population size would that imply?

According to section 5.2 in the dadi manual:

$$ \theta = 4 N_{ref} \mu_{L} \qquad \text{L: sequence length} $$

Then

$$ \mu_{L} = \mu_{site} \times L $$

$$ \theta = 4 N_{ref} \mu_{site} \times L $$

and

$$ N_{ref} = \frac{\theta}{4 \mu_{site} L} $$

Let's assume the mutation rate per nucleotide site per generation is $3\times 10^{-9}$ (see e. g. Liu2017).



In [20]:

    
mu = 3e-9
L = fs_ery.data.sum() # this sums over all entries in the spectrum, including masked ones, i. e. also contains invariable sites
print "The total sequence length for the ery spectrum is {0:,}.".format(int(L))
N_ref_ery = theta_ery/L/mu/4
print "The effective ancestral population size of ery (in number of diploid individuals) implied by this theta is: {0:,}.".format(int(N_ref_ery))









    



The total sequence length for the ery spectrum is 1,638,467.
The effective ancestral population size of ery (in number of diploid individuals) implied by this theta is: 540,002.



In [21]:

    
mu = 3e-9
L = fs_par.data.sum() # this sums over all entries in the spectrum, including masked ones, i. e. also contains invariable sites
print "The total sequence length for par spectrum is {0:,}.".format(int(L))
N_ref_par = theta_par/L/mu/4
print "The effective ancestral population size of par (in number of diploid individuals) implied by this theta is: {0:,}.".format(int(N_ref_par))









    



The total sequence length for par spectrum is 1,214,938.
The effective ancestral population size of par (in number of diploid individuals) implied by this theta is: 729,305.

These effective population sizes are consistent with those reported for other insect species (Lynch2016, fig. 3b).

In dadi, times are given in units of $2N_{ref}$ generations.

reducing noise and bias



In [22]:

    
# compare neutral model prediction with ery spectrum

dadi.Plotting.plot_1d_comp_multinom(neutral_model.fold()[:19], fs_ery[:19], residual='linear')



In [23]:

    
# compare neutral model prediction with par spectrum

dadi.Plotting.plot_1d_comp_multinom(neutral_model.fold()[:19], fs_par[:19], residual='linear')

The lower plot (green line) is for the scaled Poisson residuals.

$$ residuals = (model - data)/\sqrt{model} $$

The model is the expected counts in each frequency class. If these counts are Poisson distributed, then their variance is equal to their expectation. The differences between model and data are therefore scaled by the expected standard deviation of the model counts.

Although a standard neutral model is unrealistic for either population, the observed counts deviate by up to 90 standard deviations from this model!

What could be done about this?

The greatest deviations are seen for the first two frequency classes, the ones that should provide the greatest amount of information for theta (Fu1994) and therefore probably also other parameters. I think masking out just one of the first two frequency classes will lead to highly biased inferences. Masking both frequency classes will reduce a lot of the power to infer the demographic history. I therefore think that masking will most likely not lead to better estimates.

Toni has suggested that the doubleton class is inflated due to "miscalling" heterozygotes as homozygotes. When they contain a singleton they will be "called" as homozygote and therefore contribute to the doubleton count. This is aggravated by the fact that the sequenced individuals are all male which only possess one X chromosome. The X chromosome is the fourth largest of the 9 chromosomes of these grasshoppers (8 autosomes + X) (see Gosalvez1988, fig. 2). That is, about 1/9th of the sequenced RAD loci are haploid but ANGSD assumes all loci to be diploid. The genotype likelihoods it calculates are all referring to diploid genotypes.

I think one potential reason for the extreme deviations is that the genotype likelihoods are generally biased toward homozygote genotypes (i. e. also for autosomal loci) due to PCR duplicates (see eq. 1 in Nielsen2012). So, one potential improvement would be to remove PCR duplicates.

Another potential improvement could be found by subsampling 8/9th to 8/10th of the contigs in the SAF files and estimating an SFS from these. Given enough subsamples, one should eventually be found that maximally excludes loci from the X chromosome. This subsample is expected to produce the least squared deviations from an expected SFS under the standard neutral model. However, one could argue that this attempt to exclude problematic loci could also inadvertently remove loci that strongly deviate from neutral expectations due to non-neutral evolution, again reducing power to detect deviations from the standard neutral model. I think one could also just apply the selection criterion of the second MAF class to be lower than the first and just save all contig subsamples and SFS's that fulfill that criterioin, since that should be true for all demographic scenarios. This approach, however, is compute intensive and difficult to implement.

Ludovic has suggested optimising for a fraction $p$ of all SNP's that lie on the X chromosome. SNP's on the X chromosome are counted twice in the SFS. This fraction $p$ should therefore be subtracted from even frequency classes and added to the respective frequency class that contains SNP's that are 1/2 as frequent, e. g. from class 2 --> 1 or from 8 --> 4. One could optimise for the minimum deviation from a neutral spectrum.

Finally, it may be possible to acquire the the Locusta genome contigs scaffolded or annotated by linkage group (see Wang2014). One could then blast-annotate the RAD contigs with the homologous Locusta linkage group and create subsets of RAD contigs for each linkage group that exclude that linkage group. There may be a subset (presumably the one that excluded RAD contigs from the X chromosome) that has markedly reduced residuals when compared to a neutral spectrum.

exponential growth model

This model assumes that exponential growth (or decline) started some time $T$ in the past and the current effective population size is a multiple $\nu$ of the ancient populations size, i. e. before exponential growth began. So this model just estimates two parameters. If $\nu$ is estimated to be 1, this indicates that the population size hasn't changed (although see Myers2008). If it is below one, this indicates exponential population decline (how realistic is that?). If it is above 1, this indicates exponential population growth.

erythropus



In [24]:

    
import dill

# loading optimisation results from previous analysis

growth_ery = dill.load(open('OUT_exp_growth_model/ERY_perturb_ar_ery.dill'))



In [25]:

    
def flatten(array):
    """
        Returns a list of flattened elements of every inner lists (or tuples)
        ****RECURSIVE****
    """
    import numpy
    res = []
    for el in array:
        if isinstance(el, (list, tuple, numpy.ndarray)):
            res.extend(flatten(el))
            continue
        res.append(el)
    return list( res )



In [26]:

    
import pandas as pd



In [27]:

    
i = 4 # where to find flag, 6 for BFGS, 4 for Nelder-Mead

successfull_popt_ery = [flatten(out)[:5] for out in growth_ery if out[1][i] == 0]

df = pd.DataFrame(data=successfull_popt_ery, \
                  columns=['nu_0', 'T_0', 'nu_opt', 'T_opt', '-logL'])

df.sort_values(by='-logL', ascending=True) # smaller is better









    Out[27]:






  
    
      
      nu_0
      T_0
      nu_opt
      T_opt
      -logL
    
  
  
    
      17
      0.780521
      0.022607
      0.149288
      0.007689
      1833.629142
    
    
      5
      0.808178
      0.057064
      0.149282
      0.007688
      1833.629142
    
    
      14
      0.071850
      0.008328
      0.149282
      0.007689
      1833.629142
    
    
      16
      0.752359
      0.002897
      0.149289
      0.007689
      1833.629142
    
    
      15
      0.406271
      0.001194
      0.149279
      0.007688
      1833.629143
    
    
      13
      0.187497
      0.007036
      0.149282
      0.007688
      1833.629143
    
    
      7
      0.340431
      0.002261
      0.149281
      0.007688
      1833.629143
    
    
      4
      0.688106
      0.002398
      0.149291
      0.007689
      1833.629143
    
    
      11
      0.206299
      0.001830
      0.149276
      0.007688
      1833.629143
    
    
      8
      0.027257
      0.001609
      0.000100
      0.000002
      1845.162845
    
    
      19
      0.031108
      0.001273
      0.000100
      0.000002
      1845.162845
    
    
      1
      0.119740
      0.001195
      0.000100
      0.000002
      1845.162846
    
    
      6
      0.020200
      0.001229
      0.000100
      0.000002
      1845.162846
    
    
      10
      0.065260
      0.001312
      0.000100
      0.000002
      1845.162846
    
    
      18
      0.052127
      0.003564
      0.000100
      0.000002
      1845.162846
    
    
      9
      0.020520
      0.001354
      0.000100
      0.000002
      1845.162846
    
    
      12
      0.030152
      0.008015
      0.000100
      0.000002
      1845.162846
    
    
      0
      0.027786
      0.001974
      0.000100
      0.000002
      1845.162846
    
    
      3
      0.070763
      0.002464
      0.000100
      0.000002
      1845.162847
    
    
      2
      0.156152
      0.002152
      0.000100
      0.000002
      1845.162981



In [28]:

    
print "The most likely parameter combination for this model suggests that the ery population size started to exponentially shrink at {0:,} generations ago to a current population size of {1:,}.".format(int(0.00769*2*N_ref_ery), int(0.14929*N_ref_ery))









    



The most likely parameter combination for this model suggests that the ery population size started to exponentially shrink at 8,305 generations ago to a current population size of 80,617.

This optimal parameter set was difficult to find. It can only be found from parameter values that are already quite close to the optimal values. The model enforces exponential population decline. This may not be very realistic.

parallelus

A sweep through the parameter space did not lead to convergence of parameters for the parallelus spectrum. Optimisations starting from up to 6-fold randomly perturbed neutral parameter values (1, 1) also did not lead to successful optimisations. The exponential growth/decline model could therefore not be fit to the par spectrum.

two epoch model

erythropus

The built-in two epoch model defines a piecewise history, where at some time $T$ in the past the ancestral population instantaneously changed in size to the contemporary population size, which has ratio of $\nu$ to the ancestral population size.

The parameters values with the highest likelihood ($\nu=0.0001$ and $T=0.000002$) are hitting the lower bound of parameter space that I've set. The time parameter $T$ that dadi infers with the highest likelihood corresponds to 3 generations and therefore does not make sense. The two epoch model could therefore not be fit to the erythropus spectrum.

parallelus



In [29]:

    
ar_par_te = dill.load(open("OUT_two_epoch/PAR_perturb_ar_par.dill"))



In [30]:

    
i = 4 # index of flag with NM algorithm

successfull_popt = [flatten(out)[:5] for out in ar_par_te if out[1][i] == 0]

df = pd.DataFrame(data=successfull_popt, \
                  columns=['nu_0','T_0', 'nu_opt', 'T_opt', '-logL'])

df.sort_values(by='-logL', ascending=True)









    Out[30]:






  
    
      
      nu_0
      T_0
      nu_opt
      T_opt
      -logL
    
  
  
    
      7
      3.809859
      0.880742
      7.723638
      5.999997
      5502.338651
    
    
      6
      3.960114
      0.730363
      7.723791
      5.999995
      5502.338651
    
    
      3
      0.946438
      3.628840
      7.724187
      5.999994
      5502.338653
    
    
      1
      1.099448
      1.424327
      7.723607
      5.999982
      5502.338675
    
    
      8
      1.268052
      0.301753
      7.724743
      5.999970
      5502.338719
    
    
      5
      0.478007
      0.297796
      7.731477
      5.999997
      5502.341217
    
    
      2
      1.163708
      3.434724
      7.690900
      5.999987
      5502.388130
    
    
      0
      0.426545
      2.386989
      0.146739
      5.883732
      6925.673757
    
    
      4
      0.291222
      0.328804
      0.124747
      5.466190
      6925.673757
    
    
      9
      0.322324
      0.444581
      0.046371
      3.658299
      6925.673757

Very different population size histories have the same likelihood. The most likely parameter combinations are hitting the upper parameter bound for the time parameter (6).



In [31]:

    
print "A T of 6 corresponds to {0:,} generations.".format(6*2*N_ref_par)









    



A T of 6 corresponds to 8,751,664.83425 generations.

From the dadi manual:

If your fits often push the bounds of your parameter space (i.e., results are often at the bounds of one or more parameters), this indicates a problem. It may be that your bounds are too conservative, so try widening them. It may also be that your model is misspecified or that there are unaccounted biases in your data.

Ryan Gutenkunst in a dadi forum thread on parameters hitting the boundary:

This indicates that dadi is having trouble fitting your data. One possibility is that the history of the population includes important events that aren’t in your models. Another possibility is that your data is biased in ways that aren’t in your models. For example, maybe your missing calls for rare alleles.

I am not sure whether an even higher $T$ can reasonably be assumed. The expected time to the most recent common ancestor (MRCA) in a neutral genealogy is:

$$ E\big[T_{MRCA}\big] = 2 \Big(1-\frac{1}{n}\Big) $$

measured in $N_e$ generations (for coalescent time unit see p. 66, 92 in Wakeley2009). Note, that $T_{MRCA}$ is close to its large sample size limit of 2 already for moderate sample sizes. In order to put dadi's time parameter on the coalescent time scale it needs to be multiplied by 2! A T of 6 from dadi therefore corresponds to a T of 12 on the coalescent time scale, which should be completely beyond the possible $T_{MRCA}$ of any genealogy.

See figure 3.4, p. 79, in Wakeley2009 for the distribution of the $T_{MRCA}$. For $n=36$ it has a mode close to 1.2 and an expected value of 1.94. Values for $T_{MRCA}$ greater than 4 are very unlikely given a standard coalescent model, but may be more likely under models including population expansion or gene flow from another population.

I think a two epoch model cannot be fit to the parallelus spectrum.

bottleneck then exponential growth model

The built-in bottlegrowth model specifies an instantaneous size change $T \times 2N_{ref}$ generations in the past immediately followed by the start of exponential growth (or decline) toward the contemporary population size. The model has three parameters:

ratio of population size ($\nu_B$) after instantaneous size change with respect to the ancient population size ($N_{ref}$)
time of instantaneous size change ($T$) in $2N_{ref}$ generations in the past
ratio of contemporary to ancient population size ($\nu_F$)

erythropus



In [32]:

    
ar_ery = dill.load(open("OUT_bottlegrowth/ERY_perturb_ar_ery.dill"))



In [33]:

    
successfull_popt = [flatten(out)[:7] for out in ar_ery if out[1][4] == 0]



In [34]:

    
df = pd.DataFrame(data=successfull_popt, columns=['nuB_0', 'nuF_0', 'T_0', 'nuB_opt', 'nuF_opt', 'T_opt', '-logL'])



In [35]:

    
df.sort_values(by='-logL', ascending=False)









    Out[35]:






  
    
      
      nuB_0
      nuF_0
      T_0
      nuB_opt
      nuF_opt
      T_opt
      -logL
    
  
  
    
      2
      21.414656
      0.210739
      0.462369
      39.201407
      0.457600
      0.367940
      1649.971308
    
    
      11
      57.883309
      0.529949
      0.622005
      39.200694
      0.457599
      0.367948
      1649.971308
    
    
      5
      94.196807
      0.468070
      0.791146
      39.198693
      0.457602
      0.367938
      1649.971308
    
    
      8
      87.446500
      0.433059
      0.404938
      39.200026
      0.457600
      0.367938
      1649.971308
    
    
      3
      17.826734
      0.392610
      0.263240
      39.199184
      0.457598
      0.367939
      1649.971308
    
    
      6
      17.409909
      0.205299
      0.388550
      39.200632
      0.457606
      0.367954
      1649.971308
    
    
      10
      18.640592
      0.431983
      0.578988
      39.202016
      0.457603
      0.367948
      1649.971308
    
    
      0
      47.216687
      0.187491
      0.435927
      39.200419
      0.457606
      0.367950
      1649.971308
    
    
      7
      21.995217
      0.219187
      0.151084
      39.200011
      0.457600
      0.367943
      1649.971308
    
    
      1
      34.143955
      0.349659
      0.461401
      39.202789
      0.457604
      0.367953
      1649.971308
    
    
      9
      20.071761
      0.202917
      0.740089
      39.200109
      0.457601
      0.367945
      1649.971308
    
    
      4
      72.107587
      0.413951
      0.580664
      39.200635
      0.457605
      0.367949
      1649.971308
    
    
      12
      18.843908
      0.240474
      0.187678
      39.200723
      0.457602
      0.367947
      1649.971308

These optimal parameter values were found by a broad sweep across the parameter space (data not shown).

The optimal parameters suggest the following:



In [36]:

    
print "At time {0:,} generations ago, the ery population size instantaneously increased by almost 40-fold (to {1:,}).".format(int(0.36794*2*N_ref_ery), int(39.2*N_ref_ery))









    



At time 397,377 generations ago, the ery population size instantaneously increased by almost 40-fold (to 21,168,110).

Can a shortterm effective population size of 21 million reasonably be assumed?



In [37]:

    
print "This was followed by exponential decline towards a current population size of {0:,}.".format(int(0.4576*N_ref_ery))









    



This was followed by exponential decline towards a current population size of 247,105.

parallelus

There was no convergence on a set of optimal parameter values.

three epoch model

The built-in three_epoch model specifies a piecewise history (with only instantaneous population size changes instead of gradual changes). At time $TF+TB$ the ancient population underwent a size change, stayed at this size ($\nu_B \times N_{ref}$) for $TB \times 2N_{ref}$ generations and then underwent a second size size change $TF \times 2N_{ref}$ generations in the past to the contemporary population size ($\nu_F \times N_{ref}$). The model has therefore two population size parameters, $\nu_B$ and $\nu_F$ as well as two time parameters, $TB$ and $TF$.

With 4 parameters, this model is already so complex that a sweep through the parameter space (i. e. starting optimisations from many different positions) is prohibitively time-consuming. I have therefore started optimisations with initial parameter values that were generated by randomly perturbing a neutral parameter combination, $\nu_B = \nu_F = TB = TF = 1$, by up to 4-fold. I can therefore not be certain whether the optimal parameter combinations thus found correspond to the global or a just a local optimum of the likelihood function.

erythropus



In [38]:

    
# load optimisation results from file

ar_ery = []

import glob

for filename in glob.glob("OUT_three_epoch/ERY_perturb_ar_ery*.dill"):
    ar_ery.extend(dill.load(open(filename)))



In [39]:

    
# extract successful (i. e. converged) optimisation
successfull_popt = [flatten(out)[:9] for out in ar_ery if out[1][4] == 0]

# create data frame
df = pd.DataFrame(data=successfull_popt, \
                  columns=['nuB_0', 'nuF_0', 'TB_0', 'TF_0', 'nuB_opt', 'nuF_opt', 'TB_opt', 'TF_opt', '-logL'])

# sort data frame by negative log likelihood
df.sort_values(by='-logL', ascending=True) # smaller is better









    Out[39]:






  
    
      
      nuB_0
      nuF_0
      TB_0
      TF_0
      nuB_opt
      nuF_opt
      TB_opt
      TF_opt
      -logL
    
  
  
    
      7
      18.392331
      0.101597
      1.702540
      0.215492
      17.358511
      0.054665
      1.751921
      0.391672
      2168.111862
    
    
      21
      71.979569
      0.113046
      0.331147
      0.319804
      82.969982
      0.071288
      0.298778
      0.440360
      2168.111863
    
    
      8
      15.252442
      0.138945
      0.363293
      0.315070
      17.776575
      0.080865
      0.317333
      0.488813
      2168.111863
    
    
      10
      51.352276
      0.157910
      0.181752
      0.367491
      65.505369
      0.094434
      0.146645
      0.542281
      2168.111863
    
    
      11
      11.983834
      0.246236
      1.010640
      0.540040
      14.930735
      0.130152
      1.010041
      0.774382
      2168.111863
    
    
      12
      22.335137
      0.174836
      0.340843
      0.434766
      27.042723
      0.106110
      0.296612
      0.609153
      2168.111863
    
    
      20
      15.267625
      0.147428
      0.620736
      0.681960
      13.975804
      0.118970
      0.608653
      0.692754
      2168.111863
    
    
      13
      56.452932
      0.144727
      0.249397
      0.135380
      180.518767
      0.116126
      0.170522
      0.643191
      2168.111863
    
    
      19
      121.895028
      0.256218
      0.553528
      0.512424
      186.363472
      0.136292
      0.505059
      0.768389
      2168.111863
    
    
      9
      21.193921
      0.330266
      1.077031
      0.456888
      25.926112
      0.192458
      1.111311
      1.077319
      2168.111864
    
    
      18
      14.459448
      0.243839
      0.890411
      0.261546
      34.999661
      0.200264
      0.845499
      1.084962
      2168.111864
    
    
      15
      82.102430
      0.268245
      0.328126
      0.716259
      101.661654
      0.161491
      0.273926
      0.849436
      2168.111864
    
    
      16
      36.580333
      0.240737
      0.852438
      0.266042
      116.171135
      0.200126
      0.792436
      1.080803
      2168.111864
    
    
      0
      1.555383
      0.293791
      1.935427
      0.293656
      1.856544
      0.249924
      2.212385
      1.253689
      2168.111864
    
    
      17
      10.867122
      0.147365
      0.946355
      1.228667
      15.668847
      0.235957
      0.942984
      1.240931
      2168.111864
    
    
      5
      2.361740
      0.396505
      0.305203
      0.715585
      2.410367
      0.219186
      0.165790
      1.033617
      2168.111866
    
    
      3
      0.308729
      2.380386
      0.499412
      0.948869
      0.322112
      0.226230
      0.154488
      0.940070
      2168.111867
    
    
      14
      42.524388
      0.168468
      0.943558
      1.667497
      92.763715
      0.356487
      0.935848
      1.714824
      2168.111867
    
    
      1
      1.374368
      0.309479
      0.322377
      0.356217
      1.556442
      0.268827
      0.184434
      1.186481
      2168.111868
    
    
      4
      0.973349
      0.396776
      0.298970
      0.582019
      0.956644
      0.335899
      0.159762
      1.344350
      2168.111872
    
    
      2
      0.583930
      0.930936
      1.184826
      3.432361
      5.541715
      0.855880
      2.142814
      3.385095
      2168.111883
    
    
      6
      0.259739
      3.858553
      3.172399
      3.006089
      0.033284
      0.115157
      3.994495
      3.371261
      2173.462130

Note, all but the last parameter combination have the same likelihood. Fairly different parameter combinations have practically identical likelihood. A reduction of the contemporary population size to 1/4 of the ancient population size is quite conistently inferred ($\nu_F$). The ancient population size change ($\nu_B$) is not inferred consistently. It ranges from a 186-fold increase to a reduction to 1/3 of the ancient population size.



In [40]:

    
# remove one parameter combination with slightly lower logL than the others

df = df.sort_values(by='-logL').head(-1)



In [41]:

    
# the time of the ancient pop size change is TB+TF

df['TB+TF'] = pd.Series(df['TB_opt']+df['TF_opt'])



In [42]:

    
# extract columns from table
nuB = df.loc[:,'nuB_opt']
nuF = df.loc[:, 'nuF_opt']
Tb_Tf = df.loc[:, 'TB+TF']
TF = df.loc[:, 'TF_opt']



In [43]:

    
# turn nu (a ratio) into absolute Ne and T into generations

nuB_n = nuB*N_ref_ery
nuF_n = nuF*N_ref_ery
Tb_Tf_g = Tb_Tf*2*N_ref_ery
TF_g = TF*2*N_ref_ery



In [44]:

    
anc = [N_ref_ery] * len(nuB) # ancestral pop size
pres = [1] * len(nuB) # 1 generation in the past
past = [max(Tb_Tf_g)+1000] * len(nuB) # furthest time point in the past



In [45]:

    
pylab.rcParams['figure.figsize'] = [12.0, 10.0]
pylab.rcParams['font.size'] = 12.0

# plot the best 21 parameter combinations

for x, y in zip(zip(past, Tb_Tf_g, Tb_Tf_g, TF_g, TF_g, pres), zip(anc, anc, nuB_n, nuB_n, nuF_n, nuF_n)):
    pylab.semilogy(x, y)
pylab.xlabel('generations in the past')
pylab.ylabel('effective population size')









    Out[45]:





<matplotlib.text.Text at 0x7fb59b873390>

This plot visualises the range of stepwise population size histories that the above parameter combinations imply (all with likelihood equal to 2168.11186). Most parameter combinations infer an ancient population size increase followed by a drastic population size collapse to less than 1/3 of the ancient population size that happened more than 400,000 generations ago.

Are the strength of population size expansion, $(\nu_B)^{TB}$, and the strength population size reduction, $(\frac{1}{\nu_F})^{TF}$, correlated with each other?



In [46]:

    
pylab.plot(df['nuB_opt']**df['TB_opt'], (1.0/df['nuF_opt'])**df['TF_opt'], 'bo')
pylab.xlabel('strength of first size change')
pylab.ylabel('strength of second size change')









    Out[46]:





<matplotlib.text.Text at 0x7fb599b9b690>

If a long period of increased population size would be correlated with a long period of decreased population size, this plot should show a positive correlation. This does not seem to be the case.

parallelus



In [47]:

    
import dill

ar_par = dill.load(open("OUT_three_epoch/PAR_perturb_ar_par.dill"))
ar_par_extreme = dill.load(open("OUT_three_epoch/PAR_perturb_extreme_ar_par.dill"))



In [48]:

    
# add new output to previous output
successfull_popt = [flatten(out)[:9] for out in ar_par if out[1][4] == 0]
successfull_popt.extend([flatten(out)[:9] for out in ar_par_extreme if out[1][4] == 0])

# create data frame
df = pd.DataFrame(data=successfull_popt, \
                  columns=['nuB_0', 'nuF_0', 'TB_0', 'TF_0', 'nuB_opt', 'nuF_opt', 'TB_opt', 'TF_opt', '-logL'])

# sort data frame by negative log likelihood
df.sort_values(by='nuB_opt', ascending=True)









    Out[48]:






  
    
      
      nuB_0
      nuF_0
      TB_0
      TF_0
      nuB_opt
      nuF_opt
      TB_opt
      TF_opt
      -logL
    
  
  
    
      23
      0.010834
      0.006921
      0.784781
      1.005371
      0.010929
      0.006721
      0.784020
      1.005448
      6925.673757
    
    
      17
      0.024233
      0.009914
      1.835750
      3.960000
      0.019881
      0.008062
      1.856716
      3.578219
      6925.673757
    
    
      13
      0.021137
      0.001603
      0.035116
      0.201532
      0.021004
      0.001120
      0.036850
      0.197136
      6925.673757
    
    
      19
      0.029536
      0.006689
      0.029136
      0.560299
      0.026678
      0.006358
      0.028909
      0.558054
      6925.673757
    
    
      7
      0.034798
      0.000622
      0.034582
      0.378960
      0.032141
      0.001058
      0.032899
      0.358150
      6925.673757
    
    
      1
      2.473569
      0.361162
      3.960000
      0.920803
      0.036822
      0.001763
      0.021552
      0.781917
      6925.673757
    
    
      5
      0.204874
      0.193839
      0.141559
      1.090908
      0.039256
      0.007885
      2.189611
      1.059457
      6925.673757
    
    
      11
      0.046882
      0.005830
      3.960000
      3.341567
      0.047762
      0.004537
      3.913243
      3.408379
      6925.673757
    
    
      12
      0.166914
      0.048545
      1.474589
      1.333131
      0.157016
      0.041328
      1.446743
      1.347389
      6925.673757
    
    
      22
      0.193470
      0.050057
      0.224928
      3.960000
      0.200351
      0.040437
      0.227508
      3.677640
      6925.673757
    
    
      18
      0.307762
      0.021357
      1.046228
      0.472231
      0.302127
      0.014132
      1.046629
      0.492002
      6925.673757
    
    
      6
      0.317086
      0.007937
      2.038538
      1.510296
      0.315879
      0.007967
      2.043365
      1.535930
      6925.673757
    
    
      0
      0.638494
      0.283054
      3.960000
      1.212381
      0.473253
      0.027032
      0.663347
      1.317327
      6925.673757
    
    
      4
      2.186779
      0.569927
      2.567862
      2.802733
      0.641334
      0.104038
      0.190060
      3.830696
      6925.673757
    
    
      16
      0.769572
      0.033210
      0.247868
      3.960000
      0.764877
      0.027528
      0.252798
      3.601468
      6925.673757
    
    
      10
      1.316761
      0.306616
      0.135113
      2.202856
      1.209182
      0.114317
      0.179460
      3.781351
      6925.673757
    
    
      9
      1.329169
      0.204441
      0.152604
      3.960000
      1.245161
      0.096600
      0.201465
      3.859701
      6925.673757
    
    
      8
      1.334913
      0.025802
      0.809057
      2.734837
      1.347579
      0.022027
      0.803359
      2.492352
      6925.673757
    
    
      14
      1.649948
      0.116610
      3.532943
      3.960000
      1.671203
      0.119777
      3.654009
      3.865415
      6925.673757
    
    
      20
      1.853563
      0.115368
      3.325237
      2.930335
      1.770115
      0.084648
      2.855704
      3.272462
      6925.673757
    
    
      2
      2.433394
      0.438042
      0.462081
      2.612107
      1.907850
      0.038445
      1.426696
      2.480729
      6925.673757
    
    
      21
      2.423076
      0.059729
      0.419554
      3.960000
      2.435674
      0.059927
      0.417425
      3.938768
      6925.673757
    
    
      3
      2.548660
      0.202542
      0.135508
      3.459914
      2.507626
      0.091869
      0.155196
      3.829503
      6925.673757
    
    
      15
      3.056074
      0.139525
      0.083480
      3.960000
      3.082088
      0.113243
      0.073635
      3.827661
      6925.673757
    
    
      25
      27.733009
      0.003506
      3.201122
      1.346019
      30.203574
      0.003972
      3.091384
      1.362643
      6925.673757
    
    
      27
      38.559230
      0.005461
      1.235253
      3.883301
      41.390244
      0.005079
      1.240099
      3.769861
      6925.673757
    
    
      24
      41.386654
      0.004065
      1.411848
      1.871907
      44.738775
      0.005076
      1.414354
      1.917174
      6925.673757
    
    
      29
      48.901243
      0.028192
      1.874433
      1.116714
      56.580841
      0.030822
      1.919122
      1.112101
      6925.673757
    
    
      30
      53.472143
      0.007924
      3.721429
      3.367482
      59.768349
      0.007426
      3.747301
      3.356914
      6925.673757
    
    
      26
      67.498147
      0.015576
      1.088031
      3.551166
      63.945996
      0.014691
      1.088717
      3.584231
      6925.673757
    
    
      33
      155.267088
      0.002844
      1.982512
      0.713019
      155.438528
      0.002252
      1.990211
      0.713649
      6925.673757
    
    
      28
      268.347228
      0.036617
      3.495306
      2.235183
      225.289543
      0.032213
      3.451690
      2.183728
      6925.673757
    
    
      32
      261.612644
      0.005875
      0.776384
      3.960000
      290.395553
      0.004657
      0.779946
      3.904082
      6925.673757
    
    
      31
      316.920444
      0.007079
      0.612435
      3.960000
      315.270535
      0.005561
      0.615180
      3.948950
      6925.673757

As can be seen, extremely different population size histories have the same likelihood.



In [49]:

    
# add time for ancient size change
df['TB+TF'] = pd.Series(df['TB_opt']+df['TF_opt'])

# extract columns from table
nuB = df.loc[:,'nuB_opt']
nuF = df.loc[:, 'nuF_opt']
Tb_Tf = df.loc[:, 'TB+TF']
TF = df.loc[:, 'TF_opt']

# turn nu into absolute Ne and T into generations
nuB_n = nuB*N_ref_par
nuF_n = nuF*N_ref_par
Tb_Tf_g = Tb_Tf*2*N_ref_par
TF_g = TF*2*N_ref_par

# auxilliary
anc = [N_ref_par] * len(nuB)
pres = [1] * len(nuB)
past = [max(Tb_Tf_g)+1000] * len(nuB)

pylab.rcParams['figure.figsize'] = [12.0, 10.0]
pylab.rcParams['font.size'] = 12.0

for x, y in zip(zip(past, Tb_Tf_g, Tb_Tf_g, TF_g, TF_g, pres), zip(anc, anc, nuB_n, nuB_n, nuF_n, nuF_n)):
    pylab.semilogy(x, y)
pylab.xlabel('generations in the past')
pylab.ylabel('effective population size')









    Out[49]:





<matplotlib.text.Text at 0x7fb599b5e090>

All parameter combinations consistently show a reduction in the contemporary population size with respect to the ancient population size ($\nu_F$). The ancient population size change is much less clear and ranges from a 300-fold increase to 100-fold reduction. Also this event is inferred to have occurred quite distant in the past, generally much more than 1 million generations ago.

exponential growth then bottleneck model

This model specifies exponential growth/decline toward $\nu_B \times N_{ref}$ during a time period of $TB \times 2N_{ref}$ generations, after which (at time $TF \times 2N_{ref}$) the population size undergoes an instantaneous size change to the contemporary size ($\nu_F \times N_{ref}$).

erythropus



In [50]:

    
ar_ery = dill.load(open("OUT_expGrowth_bottleneck/ERY_perturb_ar_ery.dill"))



In [51]:

    
success = [flatten(out)[:9] for out in ar_ery if out[1][4] == 0]

df = pd.DataFrame(data=success, \
                  columns=['nuB_0','TB_0', 'nuF_0', 'TF_0', 'nuB_opt', 'TB_opt', 'nuF_opt', 'TF_opt', '-logL'])

df.sort_values(by='-logL', ascending=True)









    Out[51]:






  
    
      
      nuB_0
      TB_0
      nuF_0
      TF_0
      nuB_opt
      TB_opt
      nuF_opt
      TF_opt
      -logL
    
  
  
    
      9
      0.007491
      0.000671
      0.073788
      0.316113
      0.005604
      0.000627
      0.059733
      0.358750
      2167.753189
    
    
      3
      1.382766
      0.241601
      0.077028
      0.342455
      1.386752
      0.220245
      0.063039
      0.384148
      2167.753189
    
    
      0
      1.059698
      0.929504
      0.091952
      0.788598
      1.063068
      0.927393
      0.149945
      0.767699
      2167.753192
    
    
      2
      0.990318
      0.428491
      0.242318
      0.779877
      0.990106
      0.434801
      0.161338
      0.807679
      2167.753193
    
    
      6
      288.897957
      0.006640
      0.293071
      0.649314
      452.463434
      0.003161
      0.165810
      0.825642
      2167.753193
    
    
      5
      33.888463
      0.009623
      0.053050
      0.887303
      49.583040
      0.007919
      0.176998
      0.868265
      2167.753193
    
    
      7
      78.930914
      0.004743
      0.076432
      1.054879
      194.383486
      0.002071
      0.232022
      1.057746
      2167.753197
    
    
      8
      0.003391
      0.014814
      14.217875
      1.071744
      0.015980
      0.000869
      0.260179
      1.127289
      2167.753199
    
    
      4
      286.169004
      0.009809
      0.041980
      1.157481
      589.244354
      0.003207
      0.267345
      1.168653
      2167.753200
    
    
      10
      0.012323
      0.001271
      0.084668
      3.136494
      0.012072
      0.001157
      0.085324
      3.244760
      2172.825525
    
    
      1
      0.563572
      2.015404
      0.102770
      3.277319
      0.565273
      2.002220
      0.089449
      3.131931
      2172.825525

Except for the last two, all optimal parameter combinations have the same likelihood and they do not deviate much from their respective initial values. In all successful optimisations above, $\nu_F$, the ratio of contemporary population size to ancient population size, converges to a value below 1/3. I think an ancient size increase or decrease cannot be inferred.



In [52]:

    
F = df[df['-logL'] < 2172].loc[:,['nuF_opt', 'TF_opt']]

pylab.plot(F['TF_opt']*2*N_ref_ery, F['nuF_opt']*N_ref_ery, 'bo')
pylab.xlabel('generations')
pylab.ylabel('contemporary pop. size')









    Out[52]:





<matplotlib.text.Text at 0x7fb598f98610>

There is a clear linear correlation between the size of the inferred contemporary population size ($\nu_F \times N_{ref}$) and the time ($TF$) for which the population has been at this size.

parallelus



In [155]:

    
ar_par = dill.load(open("OUT_expGrowth_bottleneck/PAR_perturb_ar_ery.dill"))



In [156]:

    
success = [flatten(out)[:9] for out in ar_par if out[1][4] == 0]

df = pd.DataFrame(data=success, \
                  columns=['nuB_0','TB_0', 'nuF_0', 'TF_0', 'nuB_opt', 'TB_opt', 'nuF_opt', 'TF_opt', '-logL'])

df.sort_values(by='-logL', ascending=True)









    Out[156]:






  
    
      
      nuB_0
      TB_0
      nuF_0
      TF_0
      nuB_opt
      TB_opt
      nuF_opt
      TF_opt
      -logL
    
  
  
    
      5
      0.472403
      0.070881
      0.011387
      1.288252
      0.469248
      0.072113
      0.008440
      1.280387
      6929.444306
    
    
      8
      1.768930
      0.036260
      0.042468
      0.370434
      1.776229
      0.035834
      0.007350
      0.503654
      6929.444306
    
    
      1
      3.709310
      0.265380
      0.289571
      0.800686
      0.614397
      0.052903
      0.018110
      1.197595
      6929.444306
    
    
      2
      0.218420
      0.156013
      0.020085
      0.364053
      0.229155
      0.156113
      0.006889
      0.410683
      6929.444306
    
    
      6
      1.358809
      0.154517
      0.008247
      2.280554
      1.385853
      0.144384
      0.011778
      2.240702
      6929.444306
    
    
      9
      0.732430
      0.016699
      0.033366
      0.677204
      0.728324
      0.018477
      0.016094
      0.702511
      6929.444306
    
    
      3
      0.218059
      0.181408
      0.026684
      1.834435
      0.213655
      0.184490
      0.022174
      1.819735
      6929.444306
    
    
      4
      0.459281
      0.034909
      0.015712
      0.441546
      0.479841
      0.037059
      0.010306
      0.448867
      6929.444306
    
    
      7
      1.197897
      0.020329
      0.023638
      2.330702
      1.199745
      0.018330
      0.018651
      2.246834
      6929.444306
    
    
      11
      1.903343
      0.019700
      0.034356
      0.722134
      1.901370
      0.018326
      0.017947
      0.745342
      6929.444306
    
    
      10
      1.316753
      0.098304
      0.017726
      0.607624
      1.316912
      0.097550
      0.014601
      0.608083
      6929.444306
    
    
      0
      0.830906
      0.406732
      0.314983
      2.854260
      0.844383
      0.541158
      0.151917
      4.945685
      6929.444306

Often, but not always are optimal parameter values close to their respective initial values. All optimal parameter combinations have the same likelihood. Only a recent (stepwise) population decrease is consistently inferred ($\nu_F$).



In [161]:

    
F = df[df['TF_opt'] < 3].loc[:,['nuF_opt', 'TF_opt']]

pylab.plot(F['TF_opt']*2*N_ref_par, F['nuF_opt']*N_ref_par, 'bo')
pylab.xlabel('TF generations')
pylab.ylabel('contemporary pop. size')









    Out[161]:





<matplotlib.text.Text at 0x7f5fc64df3d0>

There could be a linear correlation between the duration of population size reduction and the size of the reduction, but it's weaker than for erythropus.

Conclusion

Not much can be inferred from the 1D spectra of both erythropus and parallelus. However, the dadi optimisations have consistently suggested a population size reduction more than 400,000 generations ago to less than 1/3 of the ancient population size. The reduction seems to have been much stronger in parallelus than in erythropus.

Can gene flow explain the difficulty of fitting 1D models?

The fitting of 2D models was substantially easier than fitting 1D models and led to conistent and robust parameter estimates.

Could it be that the inability to model gene flow from an external source is responsible for the issues with fitting 1D models?

I have saved to files the results of each optimisation run (i. e. for each combination of starting parameter values).



In [4]:

    
# load results of fitting isolation with migration model to 2D sepctrum

import dill, glob

split_mig_res = []

for filename in glob.glob("OUT_2D_models/split_mig*dill"):
    split_mig_res.append(dill.load(open(filename)))



In [5]:

    
from utility_functions import *



In [6]:

    
import pandas as pd



In [8]:

    
success = [flatten(out)[:9] for out in split_mig_res if out[1][4] == 0]

df = pd.DataFrame(data=success, \
                  columns=['nu1_0','nu2_0', 'T_0', 'm_0', 'nu1_opt', 'nu2_opt', 'T_opt', 'm_opt', '-logL'])

df.sort_values(by='-logL', ascending=True)









    Out[8]:






  
    
      
      nu1_0
      nu2_0
      T_0
      m_0
      nu1_opt
      nu2_opt
      T_opt
      m_opt
      -logL
    
  
  
    
      8
      2.059032
      0.360002
      0.682463
      0.192790
      0.994185
      1.766127
      0.922632
      2.506880e-01
      18574.119915
    
    
      31
      0.620927
      0.849787
      1.980000
      1.827165
      0.994247
      1.766242
      0.922690
      2.507990e-01
      18574.120405
    
    
      2
      2.894370
      11.363772
      1.980000
      0.272765
      0.993365
      1.764405
      0.921871
      2.509119e-01
      18574.121486
    
    
      28
      0.631049
      2.924698
      0.247732
      0.205392
      0.993307
      1.765318
      0.921817
      2.509740e-01
      18574.122241
    
    
      17
      0.837500
      2.063526
      0.058201
      0.000110
      0.996572
      1.771058
      0.928727
      2.508888e-01
      18574.138486
    
    
      18
      5.535163
      3.398472
      0.309525
      0.169667
      0.993863
      1.767477
      0.925657
      2.517209e-01
      18574.159471
    
    
      15
      0.318463
      0.315764
      0.587406
      1.138654
      0.989922
      1.761280
      0.914716
      2.507331e-01
      18574.164848
    
    
      1
      0.277233
      8.221059
      0.976626
      0.051974
      1.001148
      1.776879
      0.937823
      2.505879e-01
      18574.226541
    
    
      0
      5.653430
      3.362330
      1.980000
      0.101259
      0.995994
      1.769480
      0.927834
      2.507786e-01
      18574.341908
    
    
      33
      0.208178
      2.525835
      0.609693
      0.000767
      0.997653
      1.771560
      0.931469
      2.510613e-01
      18574.373801
    
    
      34
      0.683625
      3.770899
      0.327624
      0.960373
      0.986428
      1.750484
      0.899651
      2.503218e-01
      18574.507709
    
    
      7
      1.009844
      1.115355
      0.012909
      0.000309
      0.992355
      1.760054
      0.901086
      2.470464e-01
      18574.974381
    
    
      5
      2.556454
      1.352414
      1.569898
      0.135823
      1.194640
      2.112713
      1.324262
      2.303226e-01
      18613.113722
    
    
      26
      0.156669
      0.773484
      1.980000
      0.217233
      1.412143
      2.455870
      1.850644
      2.074998e-01
      18670.013487
    
    
      9
      6.348379
      0.230012
      1.457648
      0.117155
      1.464282
      2.548749
      1.990301
      2.024862e-01
      18681.403063
    
    
      24
      0.409099
      0.405881
      0.168447
      0.000052
      0.637198
      1.230384
      0.290581
      6.497219e-08
      20387.225419
    
    
      23
      2.175460
      0.556770
      0.086338
      0.000014
      0.637198
      1.230385
      0.290581
      1.748376e-20
      20387.226053
    
    
      12
      0.265271
      0.129476
      0.157514
      0.000045
      0.637196
      1.230384
      0.290580
      2.583554e-12
      20387.226053
    
    
      25
      0.085281
      0.402881
      0.364705
      0.000042
      0.637198
      1.230385
      0.290581
      3.044384e-18
      20387.226053
    
    
      16
      0.115098
      0.103779
      0.172020
      0.000113
      0.637162
      1.230734
      0.290565
      4.850122e-15
      20387.226666
    
    
      27
      3.731189
      0.119188
      0.015205
      0.000017
      0.637549
      1.230015
      0.290740
      9.454649e-15
      20387.229881
    
    
      19
      1.308999
      0.089865
      0.482895
      0.000052
      0.636599
      1.228599
      0.290307
      4.483462e-10
      20387.232239
    
    
      32
      2.699647
      0.315091
      0.058909
      0.000035
      0.636512
      1.227689
      0.290267
      8.437228e-12
      20387.240897
    
    
      20
      0.063430
      0.646475
      0.030202
      0.000377
      0.640787
      1.227274
      0.289655
      3.001320e-06
      20387.309598
    
    
      14
      0.109039
      1.752206
      0.068154
      0.000323
      0.637666
      1.229887
      0.290450
      1.278566e-07
      20387.434569
    
    
      11
      0.355181
      0.212621
      0.029912
      0.000157
      0.637670
      1.229889
      0.290449
      2.826076e-08
      20387.435537
    
    
      21
      0.199078
      0.146460
      0.338920
      0.000173
      0.637674
      1.229910
      0.290452
      2.509801e-08
      20387.435569
    
    
      30
      0.450847
      0.463437
      0.108087
      0.000060
      0.637667
      1.229892
      0.290450
      1.971549e-08
      20387.435620
    
    
      13
      0.318447
      0.199418
      0.790081
      0.000020
      0.637670
      1.229881
      0.290451
      3.715424e-09
      20387.435776
    
    
      6
      0.211271
      0.683610
      0.135427
      0.000021
      0.637667
      1.229884
      0.290449
      3.917260e-13
      20387.435812
    
    
      4
      0.670075
      0.343239
      1.980000
      0.080259
      0.632513
      1.349728
      2.000000
      4.635045e-01
      21059.797882
    
    
      29
      0.239562
      0.556576
      1.980000
      0.031463
      0.329749
      0.596793
      2.000000
      1.044005e+00
      23734.741735
    
    
      10
      5.874411
      4.471090
      1.980000
      1.041556
      6.145361
      3.497838
      1.999943
      1.044327e+00
      68992.522571
    
    
      22
      6.807827
      3.585435
      0.328641
      1.217099
      9.899311
      1.285312
      0.766758
      1.552602e+00
      71150.567474
    
    
      3
      2.157397
      13.379040
      1.980000
      1.878685
      1.662449
      13.064076
      1.999990
      2.253679e+00
      79308.735709

These are the successful parameter optimisations I have for the model that specifies a split at time $T$ in the past into two daughter populations with population size ratios (relative to $N_{ref}$) of $\nu_1$ for ery and $\nu_2$ for par. $m$ is the rate of migration, which is assumed to be symmetrical. The migration rate is the fraction of individuals from the other population times $2N_{ref}$, i. e. number of immigrant alleles.

The best paramter combination, that I got from these optimisation runs is:



In [9]:

    
popt = df.sort_values(by='-logL', ascending=True).iloc[0, 4:8]
popt









    Out[9]:





nu1_opt    0.994185
nu2_opt    1.766127
T_opt      0.922632
m_opt      0.250688
Name: 8, dtype: float64



In [10]:

    
import numpy as np



In [11]:

    
popt = np.array(popt)



In [12]:

    
func = dadi.Demographics2D.split_mig # divergence-with-gene-flow model function (built-in)

func_ex = dadi.Numerics.make_extrap_log_func(func)



In [14]:

    
ns = np.array([36, 36])

pts_l = [40, 50, 60]



In [15]:

    
# get optimal model spectrum from 2D model 'divergence-with-migration' model

split_mig_2D_best_fit_model_spectrum = func_ex(popt, ns, pts_l)



In [19]:

    
split_mig_2D_best_fit_model_spectrum









    Out[19]:





Spectrum([[-- 1.669365014467574 0.7734874049327598 ..., 0.0019068801840534282
  0.0016880833117661582 0.0016122396405866017]
 [0.9712514882123541 0.03155908682239198 0.021669589309692913 ...,
  0.0009113814098617752 0.0008563735147444584 0.0008884336124080406]
 [0.4610339967769263 0.021569819163756013 0.01301800729447351 ...,
  0.0007106325391605071 0.0006889362760500843 0.0007540492634461669]
 ..., 
 [0.00268390298752646 0.0018688497803961584 0.0015332872379591224 ...,
  0.0008110659281631959 0.0008634428154679998 0.0031097843237676967]
 [0.0026737093037348847 0.0019828540108596106 0.0016882767826340212 ...,
  0.0009928372708933927 0.000994905117700027 0.003381004089041213]
 [0.0038833015865230294 0.003158380596738628 0.0028678276250815433 ...,
  0.009695441304315065 0.010254651212319863 --]], folded=False, pop_ids=None)



In [22]:

    
# import 2D unfolded spectrum
sfs2d_unfolded = dadi.Spectrum.from_file('dadiExercises/EryPar.unfolded.2dsfs.dadi_format')

# add population labels
sfs2d_unfolded.pop_ids = ["ery", "par"]

# fold the spectrum
sfs2d = sfs2d_unfolded.fold()



In [23]:

    
# need to scale model spectrum by optimal theta, which depends on the number of sites in the data

model_spectrum = dadi.Inference.optimally_scaled_sfs(split_mig_2D_best_fit_model_spectrum, sfs2d)
model_spectrum









    Out[23]:





Spectrum([[-- 10607.885768245429 4915.082060300772 ..., 12.117162508417056
  10.726830131999671 10.244885803980013]
 [6171.76276602456 200.54043607044605 137.69818227281397 ...,
  5.79132173211359 5.4417771671448385 5.645501248329408]
 [2929.614512525931 137.06419851131926 82.72219263802181 ...,
  4.515674363175221 4.377806683741094 4.791563487280638]
 ..., 
 [17.054710059209967 11.875500454749753 9.743187217426168 ...,
  5.153872665299141 5.4866986396129915 19.760948974319632]
 [16.989934870870982 12.599933903020343 10.728059531710333 ...,
  6.3089284038054965 6.322068419632415 21.48439966555711]
 [24.67622076446882 20.06975124822048 18.22344878815268 ...,
  61.60913457366601 65.16260237320408 --]], folded=False, pop_ids=None)



In [25]:

    
import pylab

%matplotlib inline

pylab.rcParams['figure.figsize'] = [12, 10]
pylab.rcParams['font.size'] = 14

model_spectrum.pop_ids = sfs2d.pop_ids

dadi.Plotting.plot_single_2d_sfs(model_spectrum.fold(), vmin=1, cmap=pylab.cm.jet)









    Out[25]:





<matplotlib.colorbar.Colorbar at 0x7f9b0ab78b50>



In [26]:

    
# get the marginal model spectra for ery and par from the optimal 2D model spectrum

fs_ery_model = model_spectrum.marginalize([1])

fs_par_model = model_spectrum.marginalize([0])



In [27]:

    
pylab.plot(fs_par_model.fold(), 'go-', label='par')
pylab.plot(fs_ery_model.fold(), 'rs-', label='ery')
pylab.xlabel('minor allele frequency')
pylab.ylabel('SNP count')
pylab.legend()









    Out[27]:





<matplotlib.legend.Legend at 0x7f9b0aaaaad0>



In [29]:

    
# import 1D unfolded spectrum of ery
fs_ery_unfolded = dadi.Spectrum.from_file('ERY.unfolded.sfs')

# import 1D unfolded spectrum of par
fs_par_unfolded = dadi.Spectrum.from_file('PAR.unfolded.sfs')



In [31]:

    
fs_ery = fs_ery_unfolded.fold()
fs_par = fs_par_unfolded.fold()



In [34]:

    
dadi.Plotting.plot_1d_comp_multinom(fs_ery_model.fold()[:19], fs_ery[:19])



In [35]:

    
dadi.Plotting.plot_1d_comp_multinom(fs_par_model.fold()[:19], fs_par[:19])



In [36]:

    
# log likelihoods

ll_model_ery = dadi.Inference.ll_multinom(fs_ery_model.fold(), fs_ery)
ll_model_ery









    Out[36]:





-2243.4175455593177



In [37]:

    
ll_model_par = dadi.Inference.ll_multinom(fs_par_model.fold(), fs_par)
ll_model_par









    Out[37]:





-5605.2169789617146

model	ERY	PAR
exponential growth	1833.6	?
two epoch	5502.3	5502.3
bottlegrowth	1649.9	?
three epoch	2168.1	6925.6
exp growth+bottleneck	2167.7	6929.4
divergence-with-migration	2243.4	5605.2

The upper table shows the negative log likelihoods for the best parameter combination found for 5 1D models (see above sections) as well as the likelihood of 1D model spectra derived from a 2D model of divergence with migration.

The simple divergence with symmetrical gene flow model from which the marginal 1D best-fit model spectra were derived is most similar to the two_epoch model among the 1D models used for fitting. This is because the population split coincides with an instantaneous change in population size but then stays the same until present. When viewed from one of the two populations, the divergence with gene flow model is therefore identical to the two_epoch model with the addition of gene flow from the other diverging population.

For ery the addition of gene flow provides an extreme improvement compared to the two_epoch model. For par the fit is even worse. So, the lack of gene flow in the 1D models is at best only one of several factors that could explain the problems with fitting 1D models to 1D spectra.

It should be noted that the three 1D models that included 2 population size changes, bottlegrowth. three_epoch and exp growth+bottleneck, all get better fits to the data than the marginal 1D model spectrum from the best-fit divergence with gene flow model.



In [ ]:

	nu_0	T_0	nu_opt	T_opt	-logL
17	0.780521	0.022607	0.149288	0.007689	1833.629142
5	0.808178	0.057064	0.149282	0.007688	1833.629142
14	0.071850	0.008328	0.149282	0.007689	1833.629142
16	0.752359	0.002897	0.149289	0.007689	1833.629142
15	0.406271	0.001194	0.149279	0.007688	1833.629143
13	0.187497	0.007036	0.149282	0.007688	1833.629143
7	0.340431	0.002261	0.149281	0.007688	1833.629143
4	0.688106	0.002398	0.149291	0.007689	1833.629143
11	0.206299	0.001830	0.149276	0.007688	1833.629143
8	0.027257	0.001609	0.000100	0.000002	1845.162845
19	0.031108	0.001273	0.000100	0.000002	1845.162845
1	0.119740	0.001195	0.000100	0.000002	1845.162846
6	0.020200	0.001229	0.000100	0.000002	1845.162846
10	0.065260	0.001312	0.000100	0.000002	1845.162846
18	0.052127	0.003564	0.000100	0.000002	1845.162846
9	0.020520	0.001354	0.000100	0.000002	1845.162846
12	0.030152	0.008015	0.000100	0.000002	1845.162846
0	0.027786	0.001974	0.000100	0.000002	1845.162846
3	0.070763	0.002464	0.000100	0.000002	1845.162847
2	0.156152	0.002152	0.000100	0.000002	1845.162981

	nu_0	T_0	nu_opt	T_opt	-logL
7	3.809859	0.880742	7.723638	5.999997	5502.338651
6	3.960114	0.730363	7.723791	5.999995	5502.338651
3	0.946438	3.628840	7.724187	5.999994	5502.338653
1	1.099448	1.424327	7.723607	5.999982	5502.338675
8	1.268052	0.301753	7.724743	5.999970	5502.338719
5	0.478007	0.297796	7.731477	5.999997	5502.341217
2	1.163708	3.434724	7.690900	5.999987	5502.388130
0	0.426545	2.386989	0.146739	5.883732	6925.673757
4	0.291222	0.328804	0.124747	5.466190	6925.673757
9	0.322324	0.444581	0.046371	3.658299	6925.673757

	nuB_0	nuF_0	T_0	nuB_opt	nuF_opt	T_opt	-logL
2	21.414656	0.210739	0.462369	39.201407	0.457600	0.367940	1649.971308
11	57.883309	0.529949	0.622005	39.200694	0.457599	0.367948	1649.971308
5	94.196807	0.468070	0.791146	39.198693	0.457602	0.367938	1649.971308
8	87.446500	0.433059	0.404938	39.200026	0.457600	0.367938	1649.971308
3	17.826734	0.392610	0.263240	39.199184	0.457598	0.367939	1649.971308
6	17.409909	0.205299	0.388550	39.200632	0.457606	0.367954	1649.971308
10	18.640592	0.431983	0.578988	39.202016	0.457603	0.367948	1649.971308
0	47.216687	0.187491	0.435927	39.200419	0.457606	0.367950	1649.971308
7	21.995217	0.219187	0.151084	39.200011	0.457600	0.367943	1649.971308
1	34.143955	0.349659	0.461401	39.202789	0.457604	0.367953	1649.971308
9	20.071761	0.202917	0.740089	39.200109	0.457601	0.367945	1649.971308
4	72.107587	0.413951	0.580664	39.200635	0.457605	0.367949	1649.971308
12	18.843908	0.240474	0.187678	39.200723	0.457602	0.367947	1649.971308

	nuB_0	nuF_0	TB_0	TF_0	nuB_opt	nuF_opt	TB_opt	TF_opt	-logL
7	18.392331	0.101597	1.702540	0.215492	17.358511	0.054665	1.751921	0.391672	2168.111862
21	71.979569	0.113046	0.331147	0.319804	82.969982	0.071288	0.298778	0.440360	2168.111863
8	15.252442	0.138945	0.363293	0.315070	17.776575	0.080865	0.317333	0.488813	2168.111863
10	51.352276	0.157910	0.181752	0.367491	65.505369	0.094434	0.146645	0.542281	2168.111863
11	11.983834	0.246236	1.010640	0.540040	14.930735	0.130152	1.010041	0.774382	2168.111863
12	22.335137	0.174836	0.340843	0.434766	27.042723	0.106110	0.296612	0.609153	2168.111863
20	15.267625	0.147428	0.620736	0.681960	13.975804	0.118970	0.608653	0.692754	2168.111863
13	56.452932	0.144727	0.249397	0.135380	180.518767	0.116126	0.170522	0.643191	2168.111863
19	121.895028	0.256218	0.553528	0.512424	186.363472	0.136292	0.505059	0.768389	2168.111863
9	21.193921	0.330266	1.077031	0.456888	25.926112	0.192458	1.111311	1.077319	2168.111864
18	14.459448	0.243839	0.890411	0.261546	34.999661	0.200264	0.845499	1.084962	2168.111864
15	82.102430	0.268245	0.328126	0.716259	101.661654	0.161491	0.273926	0.849436	2168.111864
16	36.580333	0.240737	0.852438	0.266042	116.171135	0.200126	0.792436	1.080803	2168.111864
0	1.555383	0.293791	1.935427	0.293656	1.856544	0.249924	2.212385	1.253689	2168.111864
17	10.867122	0.147365	0.946355	1.228667	15.668847	0.235957	0.942984	1.240931	2168.111864
5	2.361740	0.396505	0.305203	0.715585	2.410367	0.219186	0.165790	1.033617	2168.111866
3	0.308729	2.380386	0.499412	0.948869	0.322112	0.226230	0.154488	0.940070	2168.111867
14	42.524388	0.168468	0.943558	1.667497	92.763715	0.356487	0.935848	1.714824	2168.111867
1	1.374368	0.309479	0.322377	0.356217	1.556442	0.268827	0.184434	1.186481	2168.111868
4	0.973349	0.396776	0.298970	0.582019	0.956644	0.335899	0.159762	1.344350	2168.111872
2	0.583930	0.930936	1.184826	3.432361	5.541715	0.855880	2.142814	3.385095	2168.111883
6	0.259739	3.858553	3.172399	3.006089	0.033284	0.115157	3.994495	3.371261	2173.462130

	nuB_0	nuF_0	TB_0	TF_0	nuB_opt	nuF_opt	TB_opt	TF_opt	-logL
23	0.010834	0.006921	0.784781	1.005371	0.010929	0.006721	0.784020	1.005448	6925.673757
17	0.024233	0.009914	1.835750	3.960000	0.019881	0.008062	1.856716	3.578219	6925.673757
13	0.021137	0.001603	0.035116	0.201532	0.021004	0.001120	0.036850	0.197136	6925.673757
19	0.029536	0.006689	0.029136	0.560299	0.026678	0.006358	0.028909	0.558054	6925.673757
7	0.034798	0.000622	0.034582	0.378960	0.032141	0.001058	0.032899	0.358150	6925.673757
1	2.473569	0.361162	3.960000	0.920803	0.036822	0.001763	0.021552	0.781917	6925.673757
5	0.204874	0.193839	0.141559	1.090908	0.039256	0.007885	2.189611	1.059457	6925.673757
11	0.046882	0.005830	3.960000	3.341567	0.047762	0.004537	3.913243	3.408379	6925.673757
12	0.166914	0.048545	1.474589	1.333131	0.157016	0.041328	1.446743	1.347389	6925.673757
22	0.193470	0.050057	0.224928	3.960000	0.200351	0.040437	0.227508	3.677640	6925.673757
18	0.307762	0.021357	1.046228	0.472231	0.302127	0.014132	1.046629	0.492002	6925.673757
6	0.317086	0.007937	2.038538	1.510296	0.315879	0.007967	2.043365	1.535930	6925.673757
0	0.638494	0.283054	3.960000	1.212381	0.473253	0.027032	0.663347	1.317327	6925.673757
4	2.186779	0.569927	2.567862	2.802733	0.641334	0.104038	0.190060	3.830696	6925.673757
16	0.769572	0.033210	0.247868	3.960000	0.764877	0.027528	0.252798	3.601468	6925.673757
10	1.316761	0.306616	0.135113	2.202856	1.209182	0.114317	0.179460	3.781351	6925.673757
9	1.329169	0.204441	0.152604	3.960000	1.245161	0.096600	0.201465	3.859701	6925.673757
8	1.334913	0.025802	0.809057	2.734837	1.347579	0.022027	0.803359	2.492352	6925.673757
14	1.649948	0.116610	3.532943	3.960000	1.671203	0.119777	3.654009	3.865415	6925.673757
20	1.853563	0.115368	3.325237	2.930335	1.770115	0.084648	2.855704	3.272462	6925.673757
2	2.433394	0.438042	0.462081	2.612107	1.907850	0.038445	1.426696	2.480729	6925.673757
21	2.423076	0.059729	0.419554	3.960000	2.435674	0.059927	0.417425	3.938768	6925.673757
3	2.548660	0.202542	0.135508	3.459914	2.507626	0.091869	0.155196	3.829503	6925.673757
15	3.056074	0.139525	0.083480	3.960000	3.082088	0.113243	0.073635	3.827661	6925.673757
25	27.733009	0.003506	3.201122	1.346019	30.203574	0.003972	3.091384	1.362643	6925.673757
27	38.559230	0.005461	1.235253	3.883301	41.390244	0.005079	1.240099	3.769861	6925.673757
24	41.386654	0.004065	1.411848	1.871907	44.738775	0.005076	1.414354	1.917174	6925.673757
29	48.901243	0.028192	1.874433	1.116714	56.580841	0.030822	1.919122	1.112101	6925.673757
30	53.472143	0.007924	3.721429	3.367482	59.768349	0.007426	3.747301	3.356914	6925.673757
26	67.498147	0.015576	1.088031	3.551166	63.945996	0.014691	1.088717	3.584231	6925.673757
33	155.267088	0.002844	1.982512	0.713019	155.438528	0.002252	1.990211	0.713649	6925.673757
28	268.347228	0.036617	3.495306	2.235183	225.289543	0.032213	3.451690	2.183728	6925.673757
32	261.612644	0.005875	0.776384	3.960000	290.395553	0.004657	0.779946	3.904082	6925.673757
31	316.920444	0.007079	0.612435	3.960000	315.270535	0.005561	0.615180	3.948950	6925.673757

	nuB_0	TB_0	nuF_0	TF_0	nuB_opt	TB_opt	nuF_opt	TF_opt	-logL
9	0.007491	0.000671	0.073788	0.316113	0.005604	0.000627	0.059733	0.358750	2167.753189
3	1.382766	0.241601	0.077028	0.342455	1.386752	0.220245	0.063039	0.384148	2167.753189
0	1.059698	0.929504	0.091952	0.788598	1.063068	0.927393	0.149945	0.767699	2167.753192
2	0.990318	0.428491	0.242318	0.779877	0.990106	0.434801	0.161338	0.807679	2167.753193
6	288.897957	0.006640	0.293071	0.649314	452.463434	0.003161	0.165810	0.825642	2167.753193
5	33.888463	0.009623	0.053050	0.887303	49.583040	0.007919	0.176998	0.868265	2167.753193
7	78.930914	0.004743	0.076432	1.054879	194.383486	0.002071	0.232022	1.057746	2167.753197
8	0.003391	0.014814	14.217875	1.071744	0.015980	0.000869	0.260179	1.127289	2167.753199
4	286.169004	0.009809	0.041980	1.157481	589.244354	0.003207	0.267345	1.168653	2167.753200
10	0.012323	0.001271	0.084668	3.136494	0.012072	0.001157	0.085324	3.244760	2172.825525
1	0.563572	2.015404	0.102770	3.277319	0.565273	2.002220	0.089449	3.131931	2172.825525

	nuB_0	TB_0	nuF_0	TF_0	nuB_opt	TB_opt	nuF_opt	TF_opt	-logL
5	0.472403	0.070881	0.011387	1.288252	0.469248	0.072113	0.008440	1.280387	6929.444306
8	1.768930	0.036260	0.042468	0.370434	1.776229	0.035834	0.007350	0.503654	6929.444306
1	3.709310	0.265380	0.289571	0.800686	0.614397	0.052903	0.018110	1.197595	6929.444306
2	0.218420	0.156013	0.020085	0.364053	0.229155	0.156113	0.006889	0.410683	6929.444306
6	1.358809	0.154517	0.008247	2.280554	1.385853	0.144384	0.011778	2.240702	6929.444306
9	0.732430	0.016699	0.033366	0.677204	0.728324	0.018477	0.016094	0.702511	6929.444306
3	0.218059	0.181408	0.026684	1.834435	0.213655	0.184490	0.022174	1.819735	6929.444306
4	0.459281	0.034909	0.015712	0.441546	0.479841	0.037059	0.010306	0.448867	6929.444306
7	1.197897	0.020329	0.023638	2.330702	1.199745	0.018330	0.018651	2.246834	6929.444306
11	1.903343	0.019700	0.034356	0.722134	1.901370	0.018326	0.017947	0.745342	6929.444306
10	1.316753	0.098304	0.017726	0.607624	1.316912	0.097550	0.014601	0.608083	6929.444306
0	0.830906	0.406732	0.314983	2.854260	0.844383	0.541158	0.151917	4.945685	6929.444306

	nu1_0	nu2_0	T_0	m_0	nu1_opt	nu2_opt	T_opt	m_opt	-logL
8	2.059032	0.360002	0.682463	0.192790	0.994185	1.766127	0.922632	2.506880e-01	18574.119915
31	0.620927	0.849787	1.980000	1.827165	0.994247	1.766242	0.922690	2.507990e-01	18574.120405
2	2.894370	11.363772	1.980000	0.272765	0.993365	1.764405	0.921871	2.509119e-01	18574.121486
28	0.631049	2.924698	0.247732	0.205392	0.993307	1.765318	0.921817	2.509740e-01	18574.122241
17	0.837500	2.063526	0.058201	0.000110	0.996572	1.771058	0.928727	2.508888e-01	18574.138486
18	5.535163	3.398472	0.309525	0.169667	0.993863	1.767477	0.925657	2.517209e-01	18574.159471
15	0.318463	0.315764	0.587406	1.138654	0.989922	1.761280	0.914716	2.507331e-01	18574.164848
1	0.277233	8.221059	0.976626	0.051974	1.001148	1.776879	0.937823	2.505879e-01	18574.226541
0	5.653430	3.362330	1.980000	0.101259	0.995994	1.769480	0.927834	2.507786e-01	18574.341908
33	0.208178	2.525835	0.609693	0.000767	0.997653	1.771560	0.931469	2.510613e-01	18574.373801
34	0.683625	3.770899	0.327624	0.960373	0.986428	1.750484	0.899651	2.503218e-01	18574.507709
7	1.009844	1.115355	0.012909	0.000309	0.992355	1.760054	0.901086	2.470464e-01	18574.974381
5	2.556454	1.352414	1.569898	0.135823	1.194640	2.112713	1.324262	2.303226e-01	18613.113722
26	0.156669	0.773484	1.980000	0.217233	1.412143	2.455870	1.850644	2.074998e-01	18670.013487
9	6.348379	0.230012	1.457648	0.117155	1.464282	2.548749	1.990301	2.024862e-01	18681.403063
24	0.409099	0.405881	0.168447	0.000052	0.637198	1.230384	0.290581	6.497219e-08	20387.225419
23	2.175460	0.556770	0.086338	0.000014	0.637198	1.230385	0.290581	1.748376e-20	20387.226053
12	0.265271	0.129476	0.157514	0.000045	0.637196	1.230384	0.290580	2.583554e-12	20387.226053
25	0.085281	0.402881	0.364705	0.000042	0.637198	1.230385	0.290581	3.044384e-18	20387.226053
16	0.115098	0.103779	0.172020	0.000113	0.637162	1.230734	0.290565	4.850122e-15	20387.226666
27	3.731189	0.119188	0.015205	0.000017	0.637549	1.230015	0.290740	9.454649e-15	20387.229881
19	1.308999	0.089865	0.482895	0.000052	0.636599	1.228599	0.290307	4.483462e-10	20387.232239
32	2.699647	0.315091	0.058909	0.000035	0.636512	1.227689	0.290267	8.437228e-12	20387.240897
20	0.063430	0.646475	0.030202	0.000377	0.640787	1.227274	0.289655	3.001320e-06	20387.309598
14	0.109039	1.752206	0.068154	0.000323	0.637666	1.229887	0.290450	1.278566e-07	20387.434569
11	0.355181	0.212621	0.029912	0.000157	0.637670	1.229889	0.290449	2.826076e-08	20387.435537
21	0.199078	0.146460	0.338920	0.000173	0.637674	1.229910	0.290452	2.509801e-08	20387.435569
30	0.450847	0.463437	0.108087	0.000060	0.637667	1.229892	0.290450	1.971549e-08	20387.435620
13	0.318447	0.199418	0.790081	0.000020	0.637670	1.229881	0.290451	3.715424e-09	20387.435776
6	0.211271	0.683610	0.135427	0.000021	0.637667	1.229884	0.290449	3.917260e-13	20387.435812
4	0.670075	0.343239	1.980000	0.080259	0.632513	1.349728	2.000000	4.635045e-01	21059.797882
29	0.239562	0.556576	1.980000	0.031463	0.329749	0.596793	2.000000	1.044005e+00	23734.741735
10	5.874411	4.471090	1.980000	1.041556	6.145361	3.497838	1.999943	1.044327e+00	68992.522571
22	6.807827	3.585435	0.328641	1.217099	9.899311	1.285312	0.766758	1.552602e+00	71150.567474
3	2.157397	13.379040	1.980000	1.878685	1.662449	13.064076	1.999990	2.253679e+00	79308.735709

Table of Contents

Plot the data

To fold or not to fold by ANGSD

$\theta$ and implied $N_{ref}$

standard neutral model

reducing noise and bias

exponential growth model

erythropus

parallelus

two epoch model

erythropus

parallelus

bottleneck then exponential growth model

erythropus

parallelus

three epoch model

erythropus

parallelus

exponential growth then bottleneck model

erythropus

parallelus

Conclusion

Can gene flow explain the difficulty of fitting 1D models?