Key features:
In [1]:
# conda install ipyrad -c bioconda
# conda install toyplot -c eaton-lab (optional)
In [2]:
import ipyrad.analysis as ipa
import toyplot
In [3]:
# the path to your VCF or HDF5 formatted snps file
data = "/home/deren/Downloads/ref_pop2.snps.hdf5"
In [4]:
# group individuals into populations
imap = {
"virg": ["TXWV2", "LALC2", "SCCU3", "FLSF33", "FLBA140"],
"mini": ["FLSF47", "FLMO62", "FLSA185", "FLCK216"],
"gemi": ["FLCK18", "FLSF54", "FLWO6", "FLAB109"],
"bran": ["BJSL25", "BJSB3", "BJVL19"],
"fusi": ["MXED8", "MXGT4", "TXGR3", "TXMD3"],
"sagr": ["CUVN10", "CUCA4", "CUSV6", "CUMM5"],
"oleo": ["CRL0030", "CRL0001", "HNDA09", "BZBB1", "MXSA3017"],
}
# minimum n samples that must be present in each SNP from each group
minmap = {i: 0.5 for i in imap}
In [5]:
# load the snp data into distance tool with arguments
from ipyrad.analysis.distance import Distance
dist = Distance(
data=data,
imap=imap,
minmap=minmap,
mincov=0.5,
impute_method="sample",
subsample_snps=False,
)
dist.run()
In [6]:
# save to a CSV file
dist.dists.to_csv("distances.csv")
In [7]:
# show the upper corner
dist.dists.head()
Out[7]:
In [8]:
toyplot.matrix(
dist.dists,
bshow=False,
tshow=False,
rlocator=toyplot.locator.Explicit(
range(len(dist.names)),
sorted(dist.names),
));
In [9]:
# get list of concatenated names from each group
ordered_names = []
for group in dist.imap.values():
ordered_names += group
# reorder matrix to match name order
ordered_matrix = dist.dists[ordered_names].T[ordered_names]
In [10]:
toyplot.matrix(
ordered_matrix,
bshow=False,
tshow=False,
rlocator=toyplot.locator.Explicit(
range(len(ordered_names)),
ordered_names,
));