In [1]:
from Bio import SeqIO, AlignIO, Phylo
from Bio.Align.Applications import ClustalwCommandline
clustalw_exe = r"C:\Program Files (x86)\ClustalW2\clustalw2.exe"
In [6]:
years = [1935,1978,2009,2014]
genomes = {}
#Create dict w/ each complete genome as a list w/ key = h or s followed by year
for year in range(len(years)):
genomes['h%i'%(years[year])] = list(SeqIO.parse('human_%i_FASTA.fa'%(years[year]), 'fasta'))
genomes['s%i'%(years[year])] = list(SeqIO.parse('swine_%i_FASTA.fa'%(years[year]), 'fasta'))
In [8]:
#Concatenate all segments from each genome into their own dict entry, key = h/sYEAR_all
for year in range(len(years)):
genomes['h%i_all'%(years[year])] = genomes['h%i'%(years[year])][0]
genomes['s%i_all'%(years[year])] = genomes['s%i'%(years[year])][0]
for seg in range(1,8):
genomes['h%i_all'%(years[year])] = genomes['h%i_all'%(years[year])] + genomes['h%i'%(years[year])][seg]
genomes['s%i_all'%(years[year])] = genomes['s%i_all'%(years[year])] + genomes['s%i'%(years[year])][seg]
genomes['h%i_all'%(years[year])].id = 'h' + str(years[year])
genomes['s%i_all'%(years[year])].id = 's' + str(years[year])
In [35]:
#Create lists of SeqRecord objects for all human H1N1 genomes, all swine, and both combined
all_human = [genomes['h1935_all']]
all_swine = [genomes['s1935_all']]
all_seq = [genomes['h1935_all'],genomes['s1935_all']]
for year in range(1,4):
all_human.append(genomes['h%i_all'%(years[year])])
all_seq.append(genomes['h%i_all'%(years[year])])
all_swine.append(genomes['s%i_all'%(years[year])])
all_seq.append(genomes['s%i_all'%(years[year])])
#Write these to FASTA files, so ClustalW can align them
SeqIO.write(all_human,'all_human.fa','fasta');
SeqIO.write(all_swine,'all_swine.fa','fasta');
SeqIO.write(all_seq,'all_seq.fa','fasta');
In [36]:
#Align all human sequences
cline_human = ClustalwCommandline(clustalw_exe,infile='all_human.fa')
stdout, stderr = cline_human()
In [37]:
#Align all swine sequences
cline_swine = ClustalwCommandline(clustalw_exe,infile='all_swine.fa')
stdout, stderr = cline_swine()
In [39]:
#Align all sequences
cline_all = ClustalwCommandline(clustalw_exe,infile='all_seq.fa')
stdout, stderr = cline_all()
In [40]:
human_aln = AlignIO.read('all_human.aln','clustal')
print human_aln
swine_aln = AlignIO.read('all_swine.aln','clustal')
print swine_aln
all_aln = AlignIO.read('all_seq.aln','clustal')
print all_aln
In [41]:
human_tree = Phylo.read('all_human.dnd',"newick")
Phylo.draw_ascii(human_tree)
swine_tree = Phylo.read('all_swine.dnd',"newick")
Phylo.draw_ascii(swine_tree)
all_tree = Phylo.read('all_seq.dnd',"newick")
Phylo.draw_ascii(all_tree)
#The distance values show the number of substitutions as a proportion of the length of the alignment (excluding gaps).