In [1]:
# conda install -c ipyrad ipyrad structure clumpp bpp
# conda install -c eaton-lab toytree toyplot
# conda install -c bioconda raxml
In [49]:
# ipcluster start --n=4
You should then be able to connect to the engines in your notebook:
In [10]:
## connect to the cluster
import ipyparallel as ipp
ipyclient = ipp.Client()
## print number of engines
print len(ipyclient), "connected engines"
The code here is to assemble the example empirical data set from the ipyrad tutotial.
In [ ]:
## import ipyrad
import ipyrad as ip
Minimal workflow: scroll down for details.
In [ ]:
## create an Assembly object
data = ip.Assembly("simdata")
## set I/O paths for the data
data.set_params("project_dir", "~/workshop")
data.set_params("raw_fastq_path", "ipsimdata/rad_example_R1_.fastq.gz")
data.set_params("barcodes_path", "ipsimdata/rad_example_barcodes.txt")
## run all steps of the Assembly
data.run("1234567")
In [3]:
## set params
data.set_params("filter_adapters", 2)
data.set_params("output_formats", "lpask")
## show params
data.get_params()
In [4]:
## run all steps of assembly
data.run("1234567")
In [5]:
## summary stats
data.stats
Out[5]:
In [35]:
import toyplot
## plot barplot
c, a, m = toyplot.bars(
data.stats.hetero_est,
height=250, width=500,
)
## style the axes
a.x.ticks.locator = toyplot.locator.Explicit(
locations=range(len(data.stats)),
labels=data.stats.index)
a.y.label.text = "Heterozygosity"
a.y.ticks.show = True
In [27]:
## s2 stats file
print data.stats_files.s2
## the .loci file location
print data.outfiles.loci
In [1]:
## import the toolkit
import ipyrad.analysis as ipa
In [2]:
import ipyrad as ip
import ipyparallel as ipp
data = ip.load_json("/home/deren/workshop/simdata.json")
ipyclient = ipp.Client()
Minimal workflow: scroll down for details.
In [3]:
## create a raxml object
s = ipa.raxml(
name=data.name,
phyfile=data.outfiles.phy,
workdir="~/workshop/analysis-raxml");
## run the analysis
s.run()
In [4]:
## modify params
s.params.T = 4
s.params.N = 100
In [5]:
## print the raxml command as a string
print s.command
In [6]:
## overwrite existing result with this 'name'
s.run(force=True)
In [7]:
print s.trees
In [8]:
import toytree
tre = toytree.tree(s.trees.bipartitions)
tre.root(wildcard='3')
tre.draw(
width=300,
node_labels=tre.get_node_values("support"),
node_size=20,
);
Minimal example, scroll down for details.
In [9]:
## create a baba object
b = ipa.baba(data=data.outfiles.loci)
## generate tests given the rooted tree
b.tests = [
{"p4":["3L_0"],
"p3":["2F_0"],
"p2":["1D_0"],
"p1":["1A_0"]}]
## run jobs distributed across the cluster
b.run(ipyclient)
b.results_table
Out[9]:
In [10]:
## init baba object
b = ipa.baba(data=data.outfiles.loci, newick=tre)
## generate all possible tests on this tree
b.generate_tests_from_tree()
## set constraints on tests
cdict = {"p4": ["3L_0"],
"p3": ["2E_0", "2F_0"],
"p2": ["1D_0"]}
## generate constrainted number of tests
b.generate_tests_from_tree(
constraint_dict=cdict,
constraint_exact=False,
)
In [11]:
## run the tests (in this case 4) linked to the baba object
b.run(ipyclient)
## show results table
b.results_table
Out[11]:
In [12]:
b.plot(
height=350,
pct_tree_x = 0.4,
pct_tree_y = 0.2,
);
In [15]:
### Save the plot
import toyplot.pdf
canvas, axes, mark = b.plot(height=350, pct_tree_x=0.4, pct_tree_y=0.2)
toyplot.pdf.render(canvas, "/home/deren/workshop/abba-baba.pdf")
## save the results table
b.results_table.to_csv("~/workshop/abba-baba.csv", sep="\t")
In [16]:
## create a tetrad class object
tet = ipa.tetrad(
name=data.name,
seqfile=data.outfiles.snpsphy,
mapfile=data.outfiles.snpsmap,
workdir="~/workshop/analysis-tetrad",
nboots=100
)
## run the analysis
tet.run(ipyclient)
In [17]:
tet.trees
Out[17]:
In [63]:
## load unrooted result tree with toytree and draw
tre = toytree.tree(tet.trees.cons)
tre.draw(
node_labels=tre.get_node_values("support"),
node_size=20,
);
In [ ]:
# conda install bpp -c ipyrad
In [19]:
## setup: define how samples group into 'species'
IMAP = {
"1": ["1A_0", "1B_0", "1C_0"],
"D": ["1D_0"],
"2": ["2F_0", "2E_0", "2G_0"],
"H": ["2H_0"],
"3": ["3J_0", "3I_0", "3K_0"],
"L": ["3L_0"],
}
## setup: define a guidetree
GUIDE = "(((1,D),(2,H)),(3,L));"
## init a bpp object
bpp = ipa.bpp(
locifile=data.outfiles.loci,
imap=IMAP,
guidetree=GUIDE,
workdir="~/workshop/analysis-bpp"
);
## submit jobs to run on the cluster
bpp.submit_bpp_jobs("A00", nreps=2, ipyclient=ipyclient)
In [20]:
## set some parameters
bpp.params.burnin = 1000
bpp.params.nsample = 5000
bpp.params.infer_sptree = 1
bpp.params.infer_delimit = 0
## set some filters
bpp.filters.maxloci = 200
bpp.filters.minsnps = 2
## submit jobs to run on the cluster
bpp.submit_bpp_jobs("A00", nreps=2, ipyclient=ipyclient)
Unlike some of the other ipyrad.analysis tools, the bpp object does not "block" while the jobs are running. Meaning that after it sends jobs to run on the cluster you can continue to interact with the notebook. This is useful since BPP is not multi-threaded, so you will likely want to submit many different types of jobs. You can check on running jobs like below.
In [21]:
## a list of submitted jobs
print bpp.asyncs
## a list of result files produced by jobs
print bpp.files
In [1]:
import ipyrad as ip
import ipyrad.analysis as ipa
import ipyparallel as ipp
data = ip.load_json("/home/deren/workshop/simdata.json")
ipyclient = ipp.Client()
In [2]:
# conda install structure -c ipyrad
# conda install clumpp -c ipyrad
In [3]:
## create a structure class object
s = ipa.structure(
name=data.name,
strfile=data.outfiles.str,
mapfile=data.outfiles.snpsmap,
workdir="~/workshop/analysis-structure",
);
s.mainparams.burnin = 100
s.mainparams.numreps = 1000
## submit jobs to run on the cluster
for kpop in [2, 3, 4, 5]:
s.submit_structure_jobs(kpop=kpop, nreps=5, ipyclient=ipyclient)
In [5]:
s.mainparams.burnin = 10000
s.mainparams.numreps = 100000
s.extraparams.usepopinfo = 0
In [6]:
## get results for a single K value
s.get_clumpp_table(3)
## make a dict for all results
tables = {}
for kpop in [2, 3, 4, 5]:
tables[kpop] = s.get_clumpp_table(kpop)
In [ ]: