In [1]:
### Notebook 1
### Data set 1 (Viburnum)
### Language: Bash
### Data Location: NCBI SRA PRJNA299402 & PRJNA299407
In [3]:
%%bash
## make a new directory for this analysis
mkdir -p empirical_1/
mkdir -p empirical_1/halfrun
mkdir -p empirical_1/fullrun
In [87]:
## import Python libraries
import pandas as pd
import numpy as np
import ipyparallel
import urllib2
import glob
import os
Sequence data for this study is archived on the NCBI sequence read archive (SRA). The data were run in two separate Illumina runs, but are combined under a single project id number.
The library contains 95 samples. We uploaded the two demultiplexed samples for each individual separately, so each sample has 2 files. Below we examine just the first library (the "half" data set) and then both libraries combined (the "full" data set). We analyze on 64 samples since the remaining samples are replicate individuals within species that are part of a separate project.
You can download the data set using the script below:
In [ ]:
%%bash
## get the data from Dryad
for run in $(seq 24 28);
do
wget -q -r -nH --cut-dirs=9 \
ftp://ftp-trace.ncbi.nlm.nih.gov/\
sra/sra-instant/reads/ByRun/sra/SRR/\
SRR191/SRR19155$run/SRR19155$run".sra";
done
In [ ]:
%%bash
## convert sra files to fastq using fastq-dump tool
fastq-dump *.sra
In [ ]:
## IPython code
## This reads in a table mapping sample names to SRA numbers
## that is hosted on github
## open table from github url
url = "https://raw.githubusercontent.com/"+\
"dereneaton/virentes/master/SraRunTable.txt"
intable = urllib2.urlopen(url)
## make name xfer dictionary
DF = pd.read_table(intable, sep="\t")
D = {DF.Run_s[i]:DF.Library_Name_s[i] for i in DF.index}
## change file names and move to fastq dir/
for fname in glob.glob("*.fastq"):
os.rename(fname, "analysis_pyrad/fastq/"+\
D[fname.replace(".fastq",".fq")])
In [1]:
%%bash
mkdir -p fastq_combined
In [26]:
## IPython code that makes a bash call w/ (!)
## get all the data from the two libraries and concatenate it
lib1tax = glob.glob("/home/deren/Documents/Vib_Lib1/fastq_Lib1/*.gz")
lib2tax = glob.glob("/home/deren/Documents/Vib_Lib1/fastq_Lib2/*.gz")
## names had to be modified to match
taxa = [i.split("/")[-1].split("_", 1)[1] for i in lib1tax]
for tax in taxa:
! cat /home/deren/Documents/Vib_Lib1/fastq_Lib1/Lib1_$tax \
/home/deren/Documents/Vib_Lib1/fastq_Lib2/Lib2_$tax \
> /home/deren/Documents/Vib_Lib1/fastq_combined/$tax
In [4]:
%%bash
pyrad --version
In [5]:
%%bash
## create a new default params file
rm params.txt
pyrad -n
In [156]:
%%bash
## substitute new parameters into file
sed -i '/## 1. /c\empirical_1/halfrun ## 1. working directory ' params.txt
sed -i '/## 6. /c\TGCAG ## 6. cutters ' params.txt
sed -i '/## 7. /c\30 ## 7. N processors ' params.txt
sed -i '/## 9. /c\6 ## 9. NQual ' params.txt
sed -i '/## 10./c\.85 ## 10. clust threshold ' params.txt
sed -i '/## 12./c\4 ## 12. MinCov ' params.txt
sed -i '/## 13./c\10 ## 13. maxSH ' params.txt
sed -i '/## 14./c\empirical_1_half_m4 ## 14. output name ' params.txt
sed -i '/## 18./c\/home/deren/Documents/Vib_Lib1/fastq_Lib1/*.fastq ## 18. data location ' params.txt
sed -i '/## 29./c\2,2 ## 29. trim overhang ' params.txt
sed -i '/## 30./c\p,n,s,a ## 30. output formats ' params.txt
In [157]:
cat params.txt
In [ ]:
%%bash
pyrad -p params.txt -s 234567 >> log.txt 2>&1
In [53]:
%%bash
sed -i '/## 12./c\2 ## 12. MinCov ' params.txt
sed -i '/## 14./c\empirical_1_half_m2 ## 14. output name ' params.txt
In [35]:
%%bash
pyrad -p params.txt -s 7 >> log.txt 2>&1
In [141]:
%%bash
## substitute new parameters into file
sed -i '/## 1. /c\empirical_1/fullrun ## 1. working directory ' params.txt
sed -i '/## 6. /c\TGCAG ## 6. cutters ' params.txt
sed -i '/## 7. /c\30 ## 7. N processors ' params.txt
sed -i '/## 9. /c\6 ## 9. NQual ' params.txt
sed -i '/## 10./c\.85 ## 10. clust threshold ' params.txt
sed -i '/## 12./c\4 ## 12. MinCov ' params.txt
sed -i '/## 13./c\10 ## 13. maxSH ' params.txt
sed -i '/## 14./c\empirical_1_full_m4 ## 14. output name ' params.txt
sed -i '/## 18./c\/home/deren/Documents/Vib_Lib1/fastq_combined/*.fastq ## 18. data location ' params.txt
sed -i '/## 29./c\2,2 ## 29. trim overhang ' params.txt
sed -i '/## 30./c\p,n,s,a ## 30. output formats ' params.txt
In [ ]:
%%bash
pyrad -p params.txt -s 234567 >> log.txt 2>&1
In [49]:
%%bash
sed -i '/## 12./c\2 ## 12. MinCov ' params.txt
sed -i '/## 14./c\empirical_1_full_m2 ## 14. output name ' params.txt
In [142]:
%%bash
pyrad -p params.txt -s 7 >> log.txt 2>&1
In [3]:
## read in the data
s2dat = pd.read_table("empirical_1/halfrun/stats/s2.rawedit.txt", header=0, nrows=66)
## print summary stats
print s2dat["passed.total"].describe()
## find which sample has the most raw data
maxraw = s2dat["passed.total"].max()
print "\nmost raw data in sample:"
print s2dat['sample '][s2dat['passed.total']==maxraw]
In [4]:
## read in the data
s2dat = pd.read_table("empirical_1/fullrun/stats/s2.rawedit.txt", header=0, nrows=66)
## print summary stats
print s2dat["passed.total"].describe()
## find which sample has the most raw data
maxraw = s2dat["passed.total"].max()
print "\nmost raw data in sample:"
print s2dat['sample '][s2dat['passed.total']==maxraw]
In [33]:
## read in the s3 results
s3dat = pd.read_table("empirical_1/halfrun/stats/s3.clusters.txt", header=0, nrows=66)
## print summary stats
print "summary of means\n=================="
print s3dat['dpt.me'].describe()
## print summary stats
print "\nsummary of std\n=================="
print s3dat['dpt.sd'].describe()
## print summary stats
print "\nsummary of proportion lowdepth\n=================="
print pd.Series(1-s3dat['d>5.tot']/s3dat["total"]).describe()
## find which sample has the greatest depth of retained loci
maxdepth = s3dat["d>5.tot"].max()
print "\nhighest coverage in sample:"
print s3dat['taxa'][s3dat['d>5.tot']==maxdepth]
In [10]:
## read in the s3 results
s3dat = pd.read_table("empirical_1/fullrun/stats/s3.clusters.txt", header=0, nrows=66)
## print summary stats
print "summary of means\n=================="
print s3dat['dpt.me'].describe()
## print summary stats
print "\nsummary of std\n=================="
print s3dat['dpt.sd'].describe()
## print summary stats
print "\nsummary of proportion hidepth\n=================="
print pd.Series(1-s3dat['d>5.tot']/s3dat["total"]).describe()
## find which sample has the greatest depth of retained loci
max_hiprop = (s3dat["d>5.tot"]/s3dat["total"]).max()
print "\nhighest coverage in sample:"
print s3dat['taxa'][s3dat['d>5.tot']/s3dat["total"]==max_hiprop]
In [13]:
## print mean and std of coverage for the highest coverage sample
with open("empirical_1/fullrun/clust.85/lantanoides_D15_Beartown_2.depths", 'rb') as indat:
depths = np.array(indat.read().strip().split(","), dtype=int)
print depths.mean(), depths.std()
In [16]:
import toyplot
import toyplot.svg
import numpy as np
## read in the depth information for this sample
with open("empirical_1/fullrun/clust.85/lantanoides_D15_Beartown_2.depths", 'rb') as indat:
depths = np.array(indat.read().strip().split(","), dtype=int)
## make a barplot in Toyplot
canvas = toyplot.Canvas(width=350, height=300)
axes = canvas.axes(xlabel="Depth of coverage (N reads)",
ylabel="N loci",
label="dataset1/sample=sulcatum_D9_MEX_003")
## select the loci with depth > 5 (kept)
keeps = depths[depths>5]
## plot kept and discarded loci
edat = np.histogram(depths, range(30)) # density=True)
kdat = np.histogram(keeps, range(30)) #, density=True)
axes.bars(edat)
axes.bars(kdat)
#toyplot.svg.render(canvas, "empirical_1_full_depthplot.svg")
Out[16]:
In [83]:
cat empirical_1/halfrun/stats/empirical_1_half_m4.stats
In [51]:
import numpy as np
indat = open("empirical_1/halfrun/stats/empirical_1_half_m4.stats").readlines()
counts = [int(i.strip().split("\t")[1]) for i in indat[8:73]]
print np.mean(counts)
print np.std(counts)
In [47]:
import numpy as np
import itertools
indat = open("empirical_1/halfrun/stats/empirical_1_half_m4.stats").readlines()
counts = [i.strip().split("\t") for i in indat[81:142]]
#print counts
ntax = [int(i[0]) for i in counts]
ncounts = [int(i[1]) for i in counts]
tots = list(itertools.chain(*[[i]*n for i,n in zip(ntax, ncounts)]))
print np.mean(tots)
print np.std(tots)
In [85]:
cat empirical_1/fullrun/stats/empirical_1_full_m4.stats
In [49]:
import numpy as np
indat = open("empirical_1/fullrun/stats/empirical_1_full_m4.stats").readlines()
counts = [int(i.strip().split("\t")[1]) for i in indat[8:73]]
print np.mean(counts)
print np.std(counts)
In [50]:
import numpy as np
import itertools
indat = open("empirical_1/fullrun/stats/empirical_1_full_m4.stats").readlines()
counts = [i.strip().split("\t") for i in indat[81:140]]
#print counts
ntax = [int(i[0]) for i in counts]
ncounts = [int(i[1]) for i in counts]
tots = list(itertools.chain(*[[i]*n for i,n in zip(ntax, ncounts)]))
print np.mean(tots)
print np.std(tots)
In [ ]:
%%bash
## raxml argumement w/ ...
raxmlHPC-PTHREADS-AVX -f a -m GTRGAMMA -N 100 -x 12345 -p 12345 -T 35 \
-w /home/deren/Documents/RADmissing/empirical_1/halfrun \
-n empirical_1_halfrun -s empirical_1/halfrun/outfiles/empirical_1_half_m4.phy \
-o "Lib1_clemensiae_DRY6_PWS_2135"
In [ ]:
%%bash
raxmlHPC-PTHREADS-AVX -f a -m GTRGAMMA -N 100 -x 12345 -p 12345 -T 35 \
-w /home/deren/Documents/RADmissing/empirical_1/fullrun \
-n empirical_1_fullrun -s empirical_1/fullrun/outfiles/empirical_1_full_m4.phy \
-o "clemensiae_DRY6_PWS_2135"
In [86]:
%%bash
head -n 20 empirical_1/halfrun/RAxML_info.empirical_1_half_m4
In [88]:
%%bash
head -n 20 empirical_1/fullrun/RAxML_info.empirical_1_full_m4
In [1]:
%load_ext rpy2.ipython
In [132]:
%%R -w 600 -h 1000
library(ape)
tre_half <- read.tree("empirical_1/halfrun/RAxML_bipartitions.empirical_1_halfrun")
#rtre <- root(tre, "Lib1_clemensiae_DRY6_PWS_2135", resolve.root=T)
#rtre <- root(rtre, "Lib1_clemensiae_DRY6_PWS_2135", resolve.root=T)
ltre_half <- ladderize(tre_half)
plot(ltre_half, cex=0.8, edge.width=2)
nodelabels(ltre_half$node.label)
In [6]:
%%R -w 600 -h 1000
library(ape)
svg("outtree.svg", height=11, width=8)
tre_full <- read.tree("empirical_1/fullrun/RAxML_bipartitions.empirical_1_fullrun")
#rtre <- root(tre, "Lib1_clemensiae_DRY6_PWS_2135", resolve.root=T)
#rtre <- root(rtre, "Lib1_clemensiae_DRY6_PWS_2135", resolve.root=T)
ltre_full <- ladderize(tre_full)
plot(ltre_full, cex=0.8, edge.width=3)
#nodelabels(ltre_full$node.label)
dev.off()
The functions nexmake
and subsample
are used to split the .loci file into individual nexus files for each locus within a new directory. Each nexus file is given a mrbayes command to run. Then we run the bucky tool mbsum
to summarize the mrbayes output, and finally run bucky
to infer concordance trees from the posterior distributions of trees across all loci.
Loci are selected on the basis that they have coverage across all tips of the selected subtree and that they contain at least 1 SNP.
In [93]:
def nexmake(taxadict, loc, nexdir, trim):
outloc = open(nexdir+"/"+str(loc)+".nex", 'w')
header = """
#NEXUS
begin data;
dimensions ntax={} nchar={};
format datatype=dna interleave=yes missing=N gap=-;
matrix
""".format(len(taxadict), len(taxadict.values()[0]))
outloc.write(header)
for tax, seq in taxadict.items():
outloc.write("{}{}{}\n"\
.format(tax[trim:trim+9],
" "*(10-len(tax[0:9])),
"".join(seq)))
mbstring = """
;
end;
begin mrbayes;
set autoclose=yes nowarn=yes;
lset nst=6 rates=gamma;
mcmc ngen=2200000 samplefreq=2000;
sump burnin=200000;
sumt burnin=200000;
end;
"""
outloc.write(mbstring)
outloc.close()
In [102]:
def unstruct(amb):
" returns bases from ambiguity code"
D = {"R":["G","A"],
"K":["G","T"],
"S":["G","C"],
"Y":["T","C"],
"W":["T","A"],
"M":["C","A"]}
if amb in D:
return D.get(amb)
else:
return [amb,amb]
def resolveambig(subseq):
N = []
for col in subseq:
N.append([unstruct(i)[np.random.binomial(1, 0.5)] for i in col])
return np.array(N)
def newPIS(seqsamp):
counts = [Counter(col) for col in seqsamp.T if not ("-" in col or "N" in col)]
pis = [i.most_common(2)[1][1] > 1 for i in counts if len(i.most_common(2))>1]
if sum(pis) >= 2:
return sum(pis)
else:
return 0
In [103]:
def parseloci(iloci, taxadict, nexdir, trim=0):
nloc = 0
## create subsampled data set
for loc in iloci:
## if all tip samples have data in this locus
names = [line.split()[0] for line in loc.split("\n")[:-1]]
## check that locus has required samples for each subtree
if all([i in names for i in taxadict.values()]):
seqs = np.array([list(line.split()[1]) for line in loc.split("\n")[:-1]])
seqsamp = seqs[[names.index(tax) for tax in taxadict.values()]]
seqsamp = resolveambig(seqsamp)
pis = newPIS(seqsamp)
if pis:
nloc += 1
## remove invariable columns given this subsampling
keep = []
for n, col in enumerate(seqsamp.T):
if all([i not in ["N","-"] for i in col]):
keep.append(n)
subseq = seqsamp.T[keep].T
## write to a nexus file
nexdict = dict(zip(taxadict.keys(), [i.tostring() for i in subseq]))
nexmake(nexdict, nloc, nexdir, trim)
print nloc, 'loci kept'
In [117]:
def getloci(locifile):
## parse the loci file by new line characters
locifile = open(locifile)
lines = locifile.readlines()
## add "|" to end of lines that contain "|"
for idx in range(len(lines)):
if "|" in lines[idx]:
lines[idx] = lines[idx].strip()+"|\n"
## join lines back together into one large string
locistr = "".join(lines)
## break string into loci at the "|\n" character
loci = locistr.split("|\n")[:-1]
## how many loci?
print len(loci), "loci"
return loci
## run on both files
loci_full = getloci("empirical_1/fullrun/outfiles/empirical_1_full_m4.loci")
loci_half = getloci("empirical_1/halfrun/outfiles/empirical_1_half_m4.loci")
In [118]:
parseloci(loci_full[:], deep_dict_f, "deep_dict_full", 0)
parseloci(loci_half[:], deep_dict_h, "deep_dict_half", 0)
#parseloci(loci[:], shallow_dict, "shallow_dict", 0)
In [119]:
## create a parallel client
ipclient = ipyparallel.Client()
lview = ipclient.load_balanced_view()
## call function across all engines
def mrbayes(infile):
import subprocess
cmd = "mb %s" % infile
subprocess.check_call(cmd, shell=True)
In [120]:
## submit all nexus files to run mb
allnex = glob.glob("deep_dict_full/*.nex")
for nex in allnex:
lview.apply(mrbayes, nex)
ipclient.wait_interactive()
In [126]:
def mbsum(nexdir, nloci):
import subprocess
## combine trees from the two replicate runs
for n in range(1, nloci+1):
cmd = "mbsum -n 101 -o {}{}.in {}{}.nex.run1.t {}{}.nex.run2.t".\
format(nexdir, n, nexdir, n, nexdir, n)
subprocess.check_call(cmd, shell=True)
In [ ]:
In [3]:
import os
import numpy as np
from collections import Counter
def subsample(infile, requires, outgroup, nexdir, trim):
""" sample n taxa from infile to create nex file"""
## counter
loc = 0
## create output directory
if not os.path.exists(nexdir):
os.mkdir(nexdir)
## input .alleles file
loci = open(infile, 'r').read().strip().split("//")
## create a dictionary of {names:seqs}
for locus in xrange(len(loci)):
locnex = [""]*len(requires)
for line in loci[locus].strip().split("\n"):
tax = line.split()[0]
seq = line.split()[-1]
if ">" in tax:
if tax in requires:
locnex[requires.index(tax)] = seq
## if all tips
if len([i for i in locnex if i]) == len(requires):
## if locus is variable
## count how many times each char occurs in each site
ccs = [Counter(i) for i in np.array([list(i) for i in locnex]).T]
## remove N and - characters and the first most occurring base
for i in ccs:
del i['-']
del i['N']
if i:
del i[i.most_common()[0][0]]
## is anything left occuring more than once (minor allele=ma)?
ma = max([max(i.values()) if i else 0 for i in ccs])
if ma > 1:
nexmake(requires, locnex, loc, outgroup, nexdir, trim)
loc += 1
return loc
In [162]:
## inputs
requires = [">triphyllum_D13_PWS_1783_0",
">jamesonii_D12_PWS_1636_0",
">sulcatum_D9_MEX_003_0",
">acutifolium_DRY3_MEX_006_0",
">dentatum_ELS4_0",
">recognitum_AA_1471_83B_0"]
outgroup = ""
infile = "empirical_1/fullrun/outfiles/empirical_1_full_m4.alleles"
nexdir = "nex_files1"
## run function
nloci = subsample(infile, requires, outgroup, nexdir, trim=1)
print nloci
In [161]:
## inputs
requires = [">Lib1_triphyllum_D13_PWS_1783_0",
">Lib1_jamesonii_D12_PWS_1636_0",
">Lib1_sulcatum_D9_MEX_003_0",
">Lib1_acutifolium_DRY3_MEX_006_0",
">Lib1_dentatum_ELS4_0",
">Lib1_recognitum_AA_1471_83B_0"]
outgroup = ""
infile = "empirical_1/halfrun/outfiles/empirical_1_half_m4.alleles"
nexdir = "nex_files2"
## run function
nloci = subsample(infile, requires, outgroup, nexdir, trim=6)
print nloci
In [16]:
## inputs
requires = [">clemensiae_DRY6_PWS_2135_0",
">tinus_D33_WC_277_0",
">taiwanianum_TW1_KFC_1952_0",
">lantanoides_D15_Beartown_2_0",
">amplificatum_D3_SAN_156003_0",
">lutescens_D35_PWS_2077_0",
">lentago_ELS85_0",
">dentatum_ELS4_0"]
outgroup = ""
infile = "empirical_1/fullrun/outfiles/empirical_1_full_m4.alleles"
nexdir = "nex_files5"
## run function
nloci = subsample(infile, requires, outgroup, nexdir, trim=1)
print nloci
In [17]:
## inputs
requires = [">Lib1_clemensiae_DRY6_PWS_2135_0",
">Lib1_tinus_D33_WC_277_0",
">Lib1_taiwanianum_TW1_KFC_1952_0",
">Lib1_lantanoides_D15_Beartown_2_0",
">Lib1_amplificatum_D3_SAN_156003_0",
">Lib1_lutescens_D35_PWS_2077_0",
">Lib1_lentago_ELS85_0",
">Lib1_dentatum_ELS4_0"]
outgroup = ""
infile = "empirical_1/halfrun/outfiles/empirical_1_half_m4.alleles"
nexdir = "nex_files6"
## run function
nloci = subsample(infile, requires, outgroup, nexdir, trim=6)
print nloci
In [19]:
import ipyparallel
import subprocess
import glob
## create a parallel client
ipclient = ipyparallel.Client()
lview = ipclient.load_balanced_view()
## call function across all engines
def mrbayes(infile):
import subprocess
cmd = "mb %s" % infile
subprocess.check_call(cmd, shell=True)
In [164]:
## run on the full data set
res = lview.map_async(mrbayes, glob.glob("nex_files1/*"))
_ = res.get()
## run on the half data set
res = lview.map_async(mrbayes, glob.glob("nex_files2/*"))
_ = res.get()
In [72]:
## run on the half data set
res = lview.map_async(mrbayes, glob.glob("nex_files3/*"))
_ = res.get()
## run on the half data set
res = lview.map_async(mrbayes, glob.glob("nex_files4/*"))
_ = res.get()
In [20]:
## run on the half data set
res = lview.map_async(mrbayes, glob.glob("nex_files5/*"))
_ = res.get()
## run on the half data set
res = lview.map_async(mrbayes, glob.glob("nex_files6/*"))
_ = res.get()
In [21]:
import os
import subprocess
def mbsum(nexdir, nloci):
## create dir for bucky input files
insdir = os.path.join(nexdir, "ins")
if not os.path.exists(insdir):
os.mkdir(insdir)
## combine trees from the two replicate runs
for n in range(nloci):
cmd = "mbsum -n 101 -o {}/{}.in {}{}.nex.run1.t {}{}.nex.run2.t".\
format(insdir, n, nexdir, n, nexdir, n)
subprocess.check_call(cmd, shell=True)
#mbsum("nex_files1/", 3300)
#mbsum("nex_files2/", 364)
#mbsum("nex_files3/", 1692)
#mbsum("nex_files4/", 169)
mbsum("nex_files5/", 1203)
mbsum("nex_files6/", 106)
In [22]:
args = []
for insdir in ["nex_files5/ins", "nex_files6/ins"]:
## independence test
args.append("bucky --use-independence-prior -k 4 -n 500000 \
-o {}/BUCKY.ind {}/*.in".format(insdir, insdir))
## alpha at three levels
for alpha in [0.1, 1, 10]:
args.append("bucky -a {} -k 4 -n 500000 -c 4 -o {}/BUCKY.{} {}/*.in".\
format(alpha, insdir, alpha, insdir))
def bucky(arg):
import subprocess
subprocess.check_call(arg, shell=True)
return arg
res = lview.map_async(bucky, args)
res.get()
Out[22]:
In [ ]:
del lbview
ipclient.close()
In [1]:
%%bash
head -n 40 nex_files1/ins/BUCKY.0.1.concordance
In [186]:
%%bash
head -n 40 nex_files1/ins/BUCKY.1.concordance
In [185]:
%%bash
head -n 40 nex_files2/ins/BUCKY.1.concordance
In [2]:
! head -n 45 nex_files3/ins/BUCKY.0.1.concordance
In [3]:
! head -n 45 nex_files4/ins/BUCKY.0.1.concordance
In [23]:
! head -n 45 nex_files5/ins/BUCKY.0.1.concordance
In [24]:
! head -n 45 nex_files6/ins/BUCKY.0.1.concordance
In [ ]:
%%bash
## raxml argumement w/ ...
raxmlHPC-PTHREADS-AVX -f a -m GTRGAMMA -N 100 -x 12345 -p 12345 -T 20 \
-w /home/deren/Documents/RADmissing/empirical_1/fullrun \
-n empirical_1_full_m2 -s empirical_1/fullrun/outfiles/empirical_1_m2.phy
In [27]:
%%bash
head -n 20 empirical_1/fullrun/RAxML_info.empirical_1_full_m2
In [37]:
%%R
mean(cophenetic.phylo(ltre))