In [1]:
# conda install ipyrad -c ipyrad
# conda install structure clumpp -c ipyrad
# conda install toytree -c eaton-lab
In [2]:
import ipyrad.analysis as ipa
import toyplot
If you include a 'mapfile' then we will use locus information to subsample just a single SNP from each locus so that the resulting data file will meet the expectations of structure that SNPs are "unlinked". If you create multiple replicates files using different random seeds then different SNPs will be selected in each rep.
In [3]:
s = ipa.structure(
name="test",
workdir="analysis-structure",
data="analysis-ipyrad/ped_min10_outfiles/ped_min10.str",
mapfile="analysis-ipyrad/ped_min10_outfiles/ped_min10.snps.map",
)
In [4]:
## set run parameters (you probably want to run >10X this long)
s.mainparams.burnin = 1000
s.mainparams.numreps = 5000
## tell structure to expect popdata & popflag
s.mainparams.popdata = 1
s.mainparams.popflag = 1
## print all mainparams
s.mainparams
Out[4]:
In [5]:
## tell structure to use popinfo
s.extraparams.usepopinfo = 1
## print all other extraparams
s.extraparams
Out[5]:
In [6]:
s.header
Out[6]:
popdata
is the a priori population assignment of an individual to a population. Assignments should be non-zero integers (e.g., 1, 2, 3). Zero is reserved to mean that there is no a priori assignment. popflag
indicates whether or not to use the population assignment in the analysis (1) or to leave it to be inferred (0). So in the example below seven samples have assigned populations (popflag=1), and six samples will have their population assignments inferred (popflag=0). The popdata information will only be used for the seven individuals with assigned pops.
In [7]:
## assign popdata and popflag by entering a list of values
s.popdata = [1, 3, 1, 2, 3, 2, 3, 3, 3, 3, 3, 1, 1]
s.popflag = [1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1]
## print the header information
s.header
Out[7]:
In [8]:
## writes three input files and then call structure yourself
s.write_structure_files(kpop=3)
Out[8]:
In [9]:
## connect to a running ipcluster instance
import ipyparallel as ipp
ipyclient = ipp.Client()
print "connected to {} cores".format(len(ipyclient))
In [10]:
## submit job replicates to ipyclient
s.run(kpop=3, nreps=3, ipyclient=ipyclient)
In [11]:
## wait for jobs to finish
ipyclient.wait()
Out[11]:
In [12]:
## get table of summarized results
table = s.get_clumpp_table(3)
In [13]:
## reorder table by membership in groups
table.sort_values(by=[0, 1, 2], inplace=True)
In [14]:
## build barplot
canvas = toyplot.Canvas(width=500, height=250)
axes = canvas.cartesian(bounds=("10%", "90%", "10%", "45%"))
axes.bars(table)
## add labels to x-axis
ticklabels = [i for i in table.index.tolist()]
axes.x.ticks.locator = toyplot.locator.Explicit(labels=ticklabels)
axes.x.ticks.labels.angle = -60
axes.x.ticks.show = True
axes.x.ticks.labels.offset = 10
axes.x.ticks.labels.style = {"font-size": "12px"}