The program TreeMix by Pickrell & Pritchard (2012) is used to infer population splits and admixture from allele frequency data. From the TreeMix documentation: "In the underlying model, the modern-day populations in a species are related to a common ancestor via a graph of ancestral populations. We use the allele frequencies in the modern populations to infer the structure of this graph."
In [1]:
## conda install -c ipyrad ipyrad
## conda install -c ipyrad treemix
## conda install -c eaton-lab toytree
In [2]:
import ipyrad.analysis as ipa
import toytree
import toyplot
import numpy as np
In [3]:
print 'ipyrad', ipa.__version__
print 'toytree', toytree.__version__
print 'toyplot', toyplot.__version__
! treemix --version | grep 'TreeMix v. '
In [4]:
## a dictionary mapping sample names to 'species' names
imap = {
"prz": ["32082_przewalskii", "33588_przewalskii"],
"cys": ["41478_cyathophylloides", "41954_cyathophylloides"],
"cya": ["30686_cyathophylla"],
"sup": ["29154_superba"],
"cup": ["33413_thamno"],
"tha": ["30556_thamno"],
"rck": ["35236_rex"],
"rex": ["35855_rex", "40578_rex"],
"lip": ["39618_rex", "38362_rex"],
}
## optional: loci will be filtered if they do not have data for at
## least N samples in each species. Minimums cannot be <1.
minmap = {
"prz": 2,
"cys": 2,
"cya": 1,
"sup": 1,
"cup": 1,
"tha": 1,
"rck": 1,
"rex": 2,
"lip": 2,
}
In [5]:
t = ipa.treemix(
name="test",
data="analysis-ipyrad/pedicularis_outfiles/pedicularis.snps.phy",
imap=imap,
minmap=minmap,
)
In [6]:
## you can set additional parameter args here
t.params.m = 1
t.params.root = "prz"
t.params.global_ = 1
#t.params.se = 1
t.params
Out[6]:
In [7]:
## write treemix input files so you can call treemix from the command line
s = t.write_output_file()
This shows the command string that corresponds to the parameter settings in the Treemix object. You can see that the input file (-i) is the string we enetered in the data field above, and the output prefix (-o) corresponds to the default working directory and the name field that we provided above. In addition, the argument (-m 1) is added because we added that to the params dictionary.
In [8]:
## the command string
print t.command
In [9]:
## you can run the command in a notebook by using bash one-liners (!)
! $t.command > analysis-treemix/treemix.log
Alternatively, you can use the .run()
command of the treemix object to run treemix. This is more convenient because the results will automatically be parsed by the treemix object so that they are easily accessible for downstream plotting. In the loop below we run treemix over a range of migration parameters (-m) and with 4 replicates per setting.
In [10]:
## a dictionary for storing treemix objects
tdict = {}
## iterate over values of m
for rep in xrange(4):
for mig in xrange(4):
## create new treemix object copy
name = "mig-{}-rep-{}".format(mig, rep)
tmp = t.copy(name)
## set params on new object
tmp.params.m = mig
## run treemix analysis
tmp.run()
## store the treemix object
tdict[name] = tmp
In [11]:
## choose a treemix object from the above analysis
t = tdict['mig-2-rep-3']
In [12]:
## access output files produced by treemix
t.files
Out[12]:
In [13]:
## access the newick string representation of the tree
t.results.tree
Out[13]:
In [14]:
## access a list of admixture edges
t.results.admixture
Out[14]:
In [15]:
## access the covariance matrix
t.results.modelcov
Out[15]:
In [16]:
def _get_admix_point(tre, idx, dist):
## parent coordinates
px, py = tre.verts[idx]
## child coordinates
cx, cy = tre.verts[tre.tree.search_nodes(idx=idx)[0].up.idx]
## angle of hypotenuse
theta = np.arctan((px-cx) / (py-cy))
## new coords along the hypot angle
horz = np.sin(theta) * dist
vert = np.cos(theta) * dist
## change x
a = tre.verts[idx, 0]
b = tre.verts[idx, 1]
a -= abs(horz)
if py < cy:
b += abs(vert)
else:
b -= abs(vert)
return a, b
In [17]:
def treemix_plot(tmp, axes):
## create a toytree object from the treemix tree result
tre = toytree.tree(newick=tmp.results.tree)
tre.draw(
axes=axes,
use_edge_lengths=True,
tree_style='c',
tip_labels_align=True,
edge_align_style={"stroke-width": 1}
);
## get coords
for admix in tmp.results.admixture:
## parse admix event
pidx, pdist, cidx, cdist, weight = admix
a = _get_admix_point(tre, pidx, pdist)
b = _get_admix_point(tre, cidx, cdist)
## add line for admixture edge
mark = axes.plot(
a = (a[0], b[0]),
b = (a[1], b[1]),
style={"stroke-width": 10*weight,
"stroke-opacity": 0.95,
"stroke-linecap": "round"}
)
## add points at admixture sink
axes.scatterplot(
a = (b[0]),
b = (b[1]),
size=8,
title="weight: {}".format(weight),
)
## add scale bar for edge lengths
axes.y.show=False
axes.x.ticks.show=True
axes.x.label.text = "Drift parameter"
return axes
In [18]:
toyplot.__version__
Out[18]:
In [24]:
## select a result
tmp = tdict["mig-1-rep-0"]
## draw the tree similar to the Treemix plotting R code
canvas = toyplot.Canvas(width=350, height=350)
axes = canvas.cartesian(padding=25, margin=75)
axes = treemix_plot(tmp, axes)
In [25]:
canvas = toyplot.Canvas(width=800, height=1200)
idx = 0
for mig in range(4):
for rep in range(4):
tmp = tdict["mig-{}-rep-{}".format(mig, rep)]
ax = canvas.cartesian(grid=(4, 4, idx), padding=25, margin=(25, 50, 100, 25))
ax = treemix_plot(tmp, ax)
idx += 1
In [26]:
## grab names from the tree
tre = toytree.tree(tmp.results.tree)
lnames = toyplot.locator.Explicit(
locations=range(len(tre.get_tip_labels())),
labels=tre.get_tip_labels(),
)
## get a colormap and plot the matrix
cmap = toyplot.color.diverging.map("BlueRed", tmp.results.cov.min(), tmp.results.cov.max())
canvas, table = toyplot.matrix(
(tmp.results.cov, cmap),
width=400,
height=400,
bshow=True,
tshow=False,
llocator=lnames,
blocator=lnames,
);
## add a color scale
tlocs = np.linspace(tmp.results.cov.min(), tmp.results.cov.max(), 5)
tlabs = ["{:.2f}".format(i) for i in tlocs]
canvas.color_scale(cmap,
x1=100, x2=300, y1=50, y2=50,
ticklocator=toyplot.locator.Explicit(
locations=tlocs,
labels=tlabs,
));