In [1]:
import Bio as bio
from Bio import AlignIO
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.colors as c

%matplotlib inline

In [2]:
all_seq = AlignIO.read('all_seq.aln','clustal')
all_seq.sort()
print all_seq


SingleLetterAlphabet() alignment with 5 rows and 13387 columns
------------AATATGGAAAGAATAAAAGAACTACGAAATCT...--- 1935
TCAATTATATTCAATATGGAAAGAATAAAAGAACTAAGGAATCT...AAA 1943
TCAATTATATTCAATATGGAAAGAATAAAAGAACTAAGGAATCT...--- 1945
--------------TATGGAAAGAATAAAAGAGCTAAGGAGTCT...--- 1978
---------------ATGGAGAGAATAAAAGAACTGAGAGATCT...A-- 2009

In [3]:
def heatmap_scores(alignment,rows,cols):
    scores = np.ones((rows,cols))

    for aln in range(rows):
        for col in range(cols):
            if alignment[aln,col] == 'A':
                scores[aln,col] = 0.0
            elif alignment[aln,col] == 'T':
                scores[aln,col] = 0.25
            elif alignment[aln,col] == 'C':
                scores[aln,col] = 0.5
            elif alignment[aln,col] == 'G':
                scores[aln,col] = 0.75
            else:
                scores[aln,col] = 1.0

    scores = scores.T
    return scores

In [4]:
all_seq_scores = heatmap_scores(all_seq,5,13387)
print all_seq_scores


[[ 1.    0.25  0.25  1.    1.  ]
 [ 1.    0.5   0.5   1.    1.  ]
 [ 1.    0.    0.    1.    1.  ]
 ..., 
 [ 1.    0.    1.    1.    0.  ]
 [ 1.    0.    1.    1.    1.  ]
 [ 1.    0.    1.    1.    1.  ]]

In [5]:
#"Heatmap" plot of alignment of all sequences

fig = plt.figure(figsize=(5,13.387))
ax = fig.add_axes([0.025,0.025,0.95,0.95],frameon=False)

cMap = c.ListedColormap(['r','b','orange','purple','w'])

heatmap = ax.pcolormesh(all_seq_scores, cmap=cMap)

ax.set_ylim(0,13387)

ax.set_xticks(np.arange(all_seq_scores.shape[1])+0.5, minor=False)

col_labels = ['1935','1943','1945','1978','2009']
ax.set_xticklabels(col_labels, minor=False)

ax.invert_yaxis()
ax.xaxis.tick_top()

for tick in ax.yaxis.get_major_ticks():
    tick.tick1On=False
    tick.tick2On=False

plt.show()



In [6]:
#Do the same as above with each seg aligned separately

seg_aligns = [[],[],[],[],[],[],[],[]]
for i in range(8):
    seg_aligns[i] = AlignIO.read('segment_%i.aln'%(i+1),'clustal')
    seg_aligns[i].sort()
    print 'Segment %i: ' %(i+1) + str(seg_aligns[i])


Segment 1: SingleLetterAlphabet() alignment with 5 rows and 2309 columns
------------AATATGGAAAGAATAAAAGAACTACGAAATCT...--- CY019962
TCAATTATATTCAATATGGAAAGAATAAAAGAACTAAGGAATCT...--- CY020292
TCAATTATATTCAATATGGAAAGAATAAAAGAACTAAGGAATCT...TTA CY021716
--------------TATGGAAAGAATAAAAGAGCTAAGGAGTCT...--- xCY019970
---------------ATGGAGAGAATAAAAGAACTGAGAGATCT...--- zCY039900
Segment 2: SingleLetterAlphabet() alignment with 5 rows and 2305 columns
-----AATGGATGTCAATCCGACCTTACTTTTCTTAAAAGTGCC...--- CY019961
------ATGGATGTCAATCCGACCTTACTTTTCTTAAAAGTGCC...--- CY020291
-----AATGGATGTCAATCCGATCTTACTTTTCTTAAAAGTTCC...--- CY021715
-----AATGGATGTCAATCCGACCTTACTTTTCTTAAAAGTGCC...--- xCY019969
ATTTGAATGGATGTCAATCCGACTCTACTTTTCCTAAAAATTCC...AAA zCY039899
Segment 3: SingleLetterAlphabet() alignment with 5 rows and 2206 columns
------ATCCAAAATGGAAGATTTTGTGCGACAATGCTTCAATC...--- CY019960
------ATCCGAAATGGAAGATTTTGTGCGACAATGCTTCAATC...--- CY020290
GTACTGATCCGAAATGGAAGATTTTGTGCGACAATGCTTCAATC...AAA CY021714
------ATCCGAAATGGAAGATTTTGTGCGACAATGCTTCAATC...--- xCY019968
----TGATCCAAAATGGAAGACTTTGTGCGACAATGCTTCAATC...--- zCY039898
Segment 4: SingleLetterAlphabet() alignment with 5 rows and 1751 columns
--------AACAACCAAAATGAAGGCAAACCTACTGGTCCTGTT...--- CY019955
--------AACAACCAAAATGAAAGCAAGACTACTGGTCCTGTT...A-- CY020285
AAAATAAAAACAACCAAAATGAAGGCAAGACTACTGGTCCTGTT...AAC CY021709
--------AACAACCAAAATGAAAGCAAAACTACTGGTCCTGTT...--- xCY019963
-----AAAAGCAACAAAAATGAAGGCAATACTAGTAGTTCTGCT...--- zCY039893
Segment 5: SingleLetterAlphabet() alignment with 5 rows and 1534 columns
------ACTCACTGAGTGACATCAAAATCATGGCGTCTCAAGGC...AAT CY019958
----TCACTCACTGAGTGACATCAAAATCATGGCGTCCCAAGGC...AAT CY020288
ATAATCACTCACTGAGTGACATCAAAATCATGGCGTCCCAAGGC...AAT CY021712
------ACTCACTGAGTGACATCAAAATCATGGCGTCCCAAGGC...--- xCY019966
----TCACTCAATGAGTGACATCGAAGCCATGGCGTCTCAAGGC...--- zCY039896
Segment 6: SingleLetterAlphabet() alignment with 5 rows and 1432 columns
-----AATGAATCCAAATCAGAAAATAATAACCATTGGATCAAT...--- CY019957
------ATGAATCCAAATCAGAAAATAATAACCATTGGATCAAT...--- CY020287
TTTAAAATGAATCCAAATCAGAAAATAATAACCATTGGATCAAT...AAA CY021711
------ATGAATCCAAATCAGAAAATAATAACCATTGGATCAAT...--- xCY019965
----AAATGAATCCAAACCAAAAGATAATAACCATTGGTTCGGT...--- zCY039895
Segment 7: SingleLetterAlphabet() alignment with 5 rows and 1000 columns
------------AGATGAGTCTTCTAACCGAGGTCGAAACGTAC...A-- CY019956
------------AGATGAGTCTTCTAACCGAGGTCGAAACGTAC...AAA CY020286
GTAGATATTGAAAGATGAGTCTTCTAACCGAGGTCGAAACGTAC...AAA CY021710
-------------GATGAGTCTTCTAACCGAGGTCGAAACGTAC...A-- xCY019964
---------TAAAGATGAGTCTTCTAACCGAGGTCGAAACGTAC...--- zCY039894
Segment 8: SingleLetterAlphabet() alignment with 5 rows and 859 columns
-------AACATAATGGATCCAAACACTGTGTCAAGCTTTCAGG...--- CY019959
----------ATAATGGATCCCAACACTGTGTCAAGCTTTCAGG...AAA CY020289
TGGCAAAAACATAATGGATCCCAACACTGTGTCAAGCTTTCAGG...--- CY021713
----------ATAATGGATCCCAACACTGTGTCAAGCTTTCAGG...--- xCY019967
------AAACATAATGGACTCCAACACCATGTCAAGCTTTCAGG...A-- zCY039897

In [7]:
seg_scores = heatmap_scores(seg_aligns[0],5,2309) #seg 1
seg_scores = np.concatenate((seg_scores,heatmap_scores(seg_aligns[1],5,2305)),axis=0) #seg 2
seg_scores = np.concatenate((seg_scores,heatmap_scores(seg_aligns[2],5,2206)),axis=0) #seg 3
seg_scores = np.concatenate((seg_scores,heatmap_scores(seg_aligns[3],5,1751)),axis=0) #seg 4
seg_scores = np.concatenate((seg_scores,heatmap_scores(seg_aligns[4],5,1534)),axis=0) #seg 5
seg_scores = np.concatenate((seg_scores,heatmap_scores(seg_aligns[5],5,1432)),axis=0) #seg 6
seg_scores = np.concatenate((seg_scores,heatmap_scores(seg_aligns[6],5,1000)),axis=0) #seg 7
seg_scores = np.concatenate((seg_scores,heatmap_scores(seg_aligns[7],5,859)),axis=0) #seg 8

In [8]:
fig2 = plt.figure(figsize=(5,13.396))
ax2 = fig2.add_axes([0.025,0.025,0.95,0.95],frameon=False)

cMap = c.ListedColormap(['r','b','orange','purple','w'])

heatmap2 = ax2.pcolormesh(seg_scores, cmap=cMap)

ax2.set_ylim(0,13396)

ax2.set_xticks(np.arange(seg_scores.shape[1])+0.5, minor=False)
col_labels = ['1935','1943','1945','1978','2009']
ax2.set_xticklabels(col_labels, minor=False)

seg_positions = [0,2310,4615,6821,8572,10106,11538,12538]
ax2.set_yticks(seg_positions, minor=False)
row_labels = ['Seg 1','Seg 2','Seg 3','Seg 4','Seg 5','Seg 6','Seg 7','Seg 8']
ax2.set_yticklabels(row_labels, minor=False)

ax2.invert_yaxis()
ax2.xaxis.tick_top()

for tick in ax2.yaxis.get_major_ticks():
    tick.tick1On=False
    tick.tick2On=False


plt.show()