In [1]:
from Bio import SeqIO, AlignIO, Phylo
from Bio.Align.Applications import ClustalwCommandline

clustalw_exe = r"C:\Program Files (x86)\ClustalW2\clustalw2.exe"

In [6]:
years = [1935,1978,2009,2014]

genomes = {}

#Create dict w/ each complete genome as a list w/ key = h or s followed by year
for year in range(len(years)):
    genomes['h%i'%(years[year])] = list(SeqIO.parse('human_%i_FASTA.fa'%(years[year]), 'fasta'))
    genomes['s%i'%(years[year])] = list(SeqIO.parse('swine_%i_FASTA.fa'%(years[year]), 'fasta'))

In [8]:
#Concatenate all segments from each genome into their own dict entry, key = h/sYEAR_all
for year in range(len(years)):
    genomes['h%i_all'%(years[year])] = genomes['h%i'%(years[year])][0]
    genomes['s%i_all'%(years[year])] = genomes['s%i'%(years[year])][0]
    for seg in range(1,8):
        genomes['h%i_all'%(years[year])] = genomes['h%i_all'%(years[year])] + genomes['h%i'%(years[year])][seg]
        genomes['s%i_all'%(years[year])] = genomes['s%i_all'%(years[year])] + genomes['s%i'%(years[year])][seg]
    genomes['h%i_all'%(years[year])].id = 'h' + str(years[year])
    genomes['s%i_all'%(years[year])].id = 's' + str(years[year])

In [35]:
#Create lists of SeqRecord objects for all human H1N1 genomes, all swine, and both combined
all_human = [genomes['h1935_all']]
all_swine = [genomes['s1935_all']]
all_seq = [genomes['h1935_all'],genomes['s1935_all']]

for year in range(1,4):
    all_human.append(genomes['h%i_all'%(years[year])])
    all_seq.append(genomes['h%i_all'%(years[year])])
    all_swine.append(genomes['s%i_all'%(years[year])])
    all_seq.append(genomes['s%i_all'%(years[year])])

#Write these to FASTA files, so ClustalW can align them
SeqIO.write(all_human,'all_human.fa','fasta');
SeqIO.write(all_swine,'all_swine.fa','fasta');
SeqIO.write(all_seq,'all_seq.fa','fasta');

In [36]:
#Align all human sequences
cline_human = ClustalwCommandline(clustalw_exe,infile='all_human.fa')
stdout, stderr = cline_human()

In [37]:
#Align all swine sequences
cline_swine = ClustalwCommandline(clustalw_exe,infile='all_swine.fa')
stdout, stderr = cline_swine()

In [39]:
#Align all sequences
cline_all = ClustalwCommandline(clustalw_exe,infile='all_seq.fa')
stdout, stderr = cline_all()

In [40]:
human_aln = AlignIO.read('all_human.aln','clustal')
print human_aln

swine_aln = AlignIO.read('all_swine.aln','clustal')
print swine_aln

all_aln = AlignIO.read('all_seq.aln','clustal')
print all_aln


SingleLetterAlphabet() alignment with 4 rows and 13334 columns
AATATGGAAAGAATAAAAGAACTACGAAATCTAATGTCGCAGTC...--- h1935
--TATGGAAAGAATAAAAGAGCTAAGGAGTCTGATGTCGCAGTC...--- h1978
---ATGGAGAGAATAAAAGAACTGAGAGATCTAATGTCGCAGTC...--- h2009
---ATGGAGAGAATAAAAGAACTGAGAGATCTAATGTCGCAGTC...TAC h2014
SingleLetterAlphabet() alignment with 4 rows and 13360 columns
TCAAATATATTCAATATGGAGAGAATAAAAGAACTAAGGGATCT...--- s1935
TCAAATATATTCAATATGGAGAGAATAAAGGAACTAAGAAATCT...--- s1978
---------------ATGGAGAGAATAAAAGAACTAAGAGATCT...CAC s2009
---------------ATGGAGAGAATAAAAGAACTGAGAGATCT...--- s2014
SingleLetterAlphabet() alignment with 8 rows and 13379 columns
---------------ATGGAGAGAATAAAAGAACTGAGAGATCT...TAC h2014
---------------ATGGAGAGAATAAAAGAACTGAGAGATCT...--- s2014
---------------ATGGAGAGAATAAAAGAACTGAGAGATCT...--- h2009
---------------ATGGAGAGAATAAAAGAACTAAGAGATCT...--- s2009
------------AATATGGAAAGAATAAAAGAACTACGAAATCT...--- h1935
--------------TATGGAAAGAATAAAAGAGCTAAGGAGTCT...--- h1978
TCAAATATATTCAATATGGAGAGAATAAAAGAACTAAGGGATCT...--- s1935
TCAAATATATTCAATATGGAGAGAATAAAGGAACTAAGAAATCT...--- s1978

In [41]:
human_tree = Phylo.read('all_human.dnd',"newick")
Phylo.draw_ascii(human_tree)

swine_tree = Phylo.read('all_swine.dnd',"newick")
Phylo.draw_ascii(swine_tree)

all_tree = Phylo.read('all_seq.dnd',"newick")
Phylo.draw_ascii(all_tree)
#The distance values show the number of substitutions as a proportion of the length of the alignment (excluding gaps).


  ____________ h1935
 |
_|_______________ h1978
 |
 |                                                                  __ h2009
 |_________________________________________________________________|
                                                                   |_____ h2014

  _____________________________ s1935
 |
_|__________________________ s1978
 |
 |                                     _____________________ s2009
 |____________________________________|
                                      |__________________________________ s2014

                                                          __________ h1935
                                             ____________|
                                        ____|            |______________ h1978
                                       |    |
                  _____________________|    |_______________ s1935
                 |                     |
  _______________|                     |____________________ s1978
 |               |
 |               |_____________ s2009
_|
 | h2009
 |
 |  _ h2014
 |_|
   |_ s2014