This could be a way to quantify the differences between genomes in an alignment. It's a work in progress and is not very well commented yet.
In [1]:
import Bio
from Bio import AlignIO
from Bio import Phylo
import inspect
import os
#specify dir where alignment files are located
aligned_dir = "Biopython Alignment"
In [2]:
#optional step
alignment = AlignIO.read(os.path.join(aligned_dir,'all_seq.aln'),'clustal')
print alignment
From biopython phylo: The distance values show the number of substitutions as a proportion of the length of the alignment (excluding gaps).
In [3]:
#http://biopython.org/wiki/Phylo_cookbook
#http://biopython.org/wiki/Phylo
tree = Phylo.read(os.path.join(aligned_dir,'all_seq.dnd'),"newick")
In [8]:
Phylo.draw_ascii(tree)
print tree
In [5]:
#get tree elements
elements = list(tree.find_elements())
named_elems = [i for i in elements if i.name != None]
named_elems
Out[5]:
In [9]:
branch_lens = []
for i in named_elems:
branch_lens.append(i.branch_length)
print i.branch_length,
print i.branch_length*alignment.get_alignment_length() #I guess we don't really need to do this. We could just compare i.branch_length
In [14]:
print sum(branch_lens)
print tree.total_branch_length() #if you run 'print tree' you see that several of the clades do not have names,which is why sum(branch_lens) != tree.total_branch_length().