The program construct is a STRUCTURE-like tool that incorporates expectations of isolation by distance. It is available as an R package. This notebook demonstrates how to convert data to the proper format for use in construct using simulated data as an example. For details on running construct see their documentation.
In [1]:
# conda install ipyrad ipcoal -c conda-forge -c bioconda
In [3]:
import ipyrad.analysis as ipa
import toytree
import ipcoal
In [22]:
print('ipyrad', ipa.__version__)
print('toytree', toytree.__version__)
print('ipcoal', ipcoal.__version__)
In [5]:
# network model
tree = toytree.rtree.unittree(7, treeheight=3e6, seed=123)
tree.draw(ts='o', admixture_edges=(3, 2));
In [15]:
# simulation model with admixture and missing data
model = ipcoal.Model(tree, Ne=1e4, nsamples=4, admixture_edges=(3, 2, 0.5, 0.1))
model.sim_snps(250)
model.write_snps_to_hdf5(name="test-construct", outdir="/tmp", diploid=True)
In [16]:
# the path to your HDF5 formatted snps file
SNPS = "/tmp/test-construct.snps.hdf5"
In [17]:
IMAP = {
"r0": ["r0-0", "r0-1"],
"r1": ["r1-0", "r1-1"],
"r2": ["r2-0", "r2-1"],
"r3": ["r3-0", "r3-1"],
"r4": ["r4-0", "r4-1"],
"r5": ["r5-0", "r5-1"],
"r6": ["r6-0", "r6-1"],
}
In [18]:
# apply filtering to the SNPs file
tool = ipa.snps_extracter(data=SNPS, imap=IMAP, minmap={i:2 for i in IMAP})
tool.parse_genos_from_hdf5();
In [19]:
# convert SNP data to genotype frequencies
df = tool.get_population_geno_frequency()
df.head()
Out[19]:
In [20]:
# write to a file
df.to_csv("/tmp/freqs.csv")