In [1]:
%matplotlib inline
from Bio import SeqIO    
import numpy as np
import pandas as pd    
import matplotlib.pyplot as plt

In [12]:
def get_qualitites(filename, fmt=None):
    '''Get the qualities from the FASTQ'''
    if fmt is None:
        fmt = filename.split('.')[-1]
    
    L = max(len(read) for read in SeqIO.parse(filename, fmt))

    df = pd.DataFrame((read.letter_annotations['phred_quality'] + [np.nan] * (L - len(read))
                      for read in SeqIO.parse(filename, fmt)),
                      dtype=np.float64)
    return df

In [2]:
def readsToPanel (filename, fmt=None):
    '''Converts the sequences and quality scores from a sequence file to a Pandas panel.
    The items are 'sequence' and 'quality', the major axis is each read, the minor axis
    is the base or quality score.
    
    Parameters:
        filename = the name of the file
        fmt = the format of the file, defaults to the extension of the filename.
    '''
    if fmt is None:
        fmt = filename.split('.')[-1]
        
    size = max(len(read) for read in SeqIO.parse(filename, fmt))
            
    seqs = pd.DataFrame(((i) for i in read.seq) for read in SeqIO.parse(filename, fmt))
    
    quals = pd.DataFrame((read.letter_annotations['phred_quality'] + [np.nan] * (size - len(read))
                      for read in SeqIO.parse(filename, fmt)),
                      dtype=np.float64)
    
    data = pd.Panel ({'sequence' : seqs, 'quality' : quals})
    
    return data

In [3]:
def qualityByPosition(data):
    '''This function returns the mean of each base position in
    a read in a panel.

    Parameters
        data: The panel with the reads from the readsToPanel function.
    '''
    means = []
    for i in data.loc['quality']:
        means.append(np.mean(data.loc['quality'][i]))
    
    return means

In [4]:
def qualityByRead(data):
    '''This function returns the mean of each read in a panel.

    Parameters
        data: The panel with the reads from the readsToPanel function.
    '''
    means = []
    for i in data['quality'].transpose():
         means.append(np.mean(data['quality'].transpose()[i]))

    return means

In [5]:
def qualityByBase(data):
    '''This function prints the mean of each base in a dataframe.

    Parameters
        data: The panel with the reads from the readsToPanel function.
    '''
    a = []
    t = []
    c = []
    g = []
    
    for i, position in enumerate(data['sequence']):
        for j, base in enumerate(data['sequence'][i]):
            if base == 'A':
                a.append(data['quality'][i][j])
            elif base == 'T':
                t.append(data['quality'][i][j])
            elif base == 'C':
                c.append(data['quality'][i][j])
            elif base == 'G':
                g.append(data['quality'][i][j])

    print ("A: mean: ", np.mean(a), " standard deviation: ", np.std(a))
    print ("T: mean: ", np.mean(t), " standard deviation: ", np.std(t))
    print ("C: mean: ", np.mean(c), " standard deviation: ", np.std(c))
    print ("G: mean: ", np.mean(g), " standard deviation: ", np.std(g))
     
    return

In [54]:
def consensusSequence (data):
    '''Returns a string and a list of dictionaries. The string contains the consensus
    sequence selected by determining which base had the most 'votes' at a given position.
    Each dictionary contains the number of 'votes' for a base in a given position by
    tallying all the reads at that position.
    
    Parameters
        data: The panel with the reads from the readsToPanel function.
    '''
    rawConsensus = []
    string = ""
    for i in data.loc['sequence']:
        counts = { 'A' : 0, 'T' : 0, 'C' : 0, 'G' : 0 }
        for base in data.loc['sequence'][i]:
            if base == 'A':
                counts['A'] += 1
            elif base == 'T':
                counts['T'] += 1
            elif base == 'C':
                counts['C'] += 1
            elif base == 'G':  
                counts['G'] =+ 1
        # Consensus sequence looked strange so I wanted to see raw counts for each position.
        rawConsensus.append(counts)
        string += max(counts, key=counts.get)
    
    return string, rawConsensus

In [ ]:
def qualityByFiveBases(data):
    '''NOT YET FUNCTIONAL
    
    This function identifies the sequence and average quality of the ten lowest 
    quality five base groups.

    Parameters
        data: The panel with the reads from the readsToPanel function.
    '''
    #start with quality by positions scores.
            #calculate mean of every five numbers
            #
    
    #dictionary with ten entries

In [269]:
fig, ax = plt.subplots(1, 1, figsize=(10, 8))
y = a.mean(axis=0)
x = np.arange(len(y)) + 1
dy = a.std(axis=0)
ax.plot(x, y, lw=2, color='k')
ax.fill_between(x, y-dy, y+dy, color='k', alpha=0.3)
ax.set_xlabel('Position in read [bp]')
ax.set_ylabel('Average phred quality at position')
ax.grid(True)

n_reads_pos = (a.shape[0] - np.isnan(a).sum(axis=0)) / 200.
ax.bar(x-0.5, n_reads_pos, 1, facecolor='steelblue', edgecolor='blue', alpha=0.5, lw=1.5)

plt.tight_layout()



In [7]:
data = readsToPanel('../data/SRR2153267.fastq')

In [8]:
qualityByBase(data)


A: mean:  37.3429800449  standard deviation:  5.35899249013
T: mean:  37.5204021743  standard deviation:  5.35358340761
C: mean:  35.1542051234  standard deviation:  7.52161420811
G: mean:  35.8320445617  standard deviation:  7.12377956703

In [10]:
plt.hist(qualityByRead(data))


Out[10]:
(array([    6.,    17.,    31.,    78.,   191.,   456.,   819.,  1112.,
          999.,   345.]),
 array([ 28.35275081,  29.46763247,  30.58251413,  31.69739579,
         32.81227744,  33.9271591 ,  35.04204076,  36.15692242,
         37.27180408,  38.38668574,  39.5015674 ]),
 <a list of 10 Patch objects>)

In [53]:
df = data.loc['quality']

fig, ax = plt.subplots(1, 1, figsize=(10, 8))
y = df.mean(axis=0)
x = np.arange(len(y)) + 1
dy = df.std(axis=0)
ax.plot(x, y, lw=2, color='k')
ax.fill_between(x, y-dy, y+dy, color='k', alpha=0.3)

ax.set_xlabel('Position in read [bp]')
ax.set_ylabel('Average phred quality at position')
ax.grid(True)

# Data for the subplot. Shape returns a tuple (an immutable list) of axes,
# np.isnan returns a boolean array of nan values, sum adds each together on
# the given axis. Result is a list with a value for reads per position.
n_reads_pos = (df.shape[0] - np.isnan(df).sum(axis=0)) / 200.

# Makes a bar graph, x coordinate of bar, height of bar, width, etc. bar edge width 
ax.bar(x-.5, n_reads_pos, 1, facecolor='steelblue', edgecolor='blue', alpha=0.5, lw=1.5)

# Adjust subplot paramters to fit graph.
plt.tight_layout()



In [55]:
consensusSequence(data)


Out[55]:
('CAATCCGTAACTTACCTAAAAAAACCTTAACCATTATTCCCTCCCCCCAATAATCAAAACCATAATAATAACCACCACCACCACCACCACCACCATCATCCTCCTCCTCCTCCTCCTCCTCCTCCTTCTTCATCATCTTCTTCTTCTTCTTCTTCTTCTTCTTTTTTTTTTTTTTCATTATCATCTTCTTCTTCTTTTTTTTTTTTTTATTATTATTATTATTACTACTACTACTCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCCACCACAAAAACAAAAAAAAAAAAACAACCCCACCCCCCCCCCCCCCCCCCAACCCCCAAACCACAAAAACCAAAACCACCAAAACACCAAAACCCAATAAACCTACCTCCGGTGCTGTGGTGGGGCGCGGTAGGGATACG',
 [{'A': 0, 'C': 4054, 'G': 0, 'T': 0},
  {'A': 4054, 'C': 0, 'G': 0, 'T': 0},
  {'A': 4054, 'C': 0, 'G': 0, 'T': 0},
  {'A': 0, 'C': 0, 'G': 0, 'T': 4054},
  {'A': 0, 'C': 4054, 'G': 0, 'T': 0},
  {'A': 0, 'C': 4054, 'G': 0, 'T': 0},
  {'A': 0, 'C': 0, 'G': 1, 'T': 0},
  {'A': 0, 'C': 0, 'G': 0, 'T': 4054},
  {'A': 4054, 'C': 0, 'G': 0, 'T': 0},
  {'A': 4054, 'C': 0, 'G': 0, 'T': 0},
  {'A': 2, 'C': 4046, 'G': 0, 'T': 6},
  {'A': 1, 'C': 2, 'G': 0, 'T': 4051},
  {'A': 8, 'C': 1, 'G': 1, 'T': 4044},
  {'A': 4041, 'C': 10, 'G': 0, 'T': 3},
  {'A': 3, 'C': 4048, 'G': 1, 'T': 2},
  {'A': 0, 'C': 4036, 'G': 1, 'T': 16},
  {'A': 2, 'C': 3, 'G': 1, 'T': 4030},
  {'A': 21, 'C': 0, 'G': 1, 'T': 5},
  {'A': 4025, 'C': 1, 'G': 1, 'T': 1},
  {'A': 10, 'C': 1, 'G': 1, 'T': 0},
  {'A': 28, 'C': 1, 'G': 1, 'T': 0},
  {'A': 4011, 'C': 1, 'G': 1, 'T': 2},
  {'A': 40, 'C': 4, 'G': 1, 'T': 2},
  {'A': 4001, 'C': 34, 'G': 1, 'T': 3},
  {'A': 12, 'C': 3994, 'G': 1, 'T': 4},
  {'A': 1, 'C': 11, 'G': 1, 'T': 7},
  {'A': 3, 'C': 2, 'G': 1, 'T': 48},
  {'A': 10, 'C': 2, 'G': 1, 'T': 3981},
  {'A': 62, 'C': 11, 'G': 1, 'T': 12},
  {'A': 3960, 'C': 79, 'G': 1, 'T': 3},
  {'A': 17, 'C': 4028, 'G': 1, 'T': 4},
  {'A': 47, 'C': 3974, 'G': 1, 'T': 14},
  {'A': 2902, 'C': 29, 'G': 1, 'T': 32},
  {'A': 25, 'C': 35, 'G': 1, 'T': 1472},
  {'A': 34, 'C': 25, 'G': 1, 'T': 502},
  {'A': 83, 'C': 29, 'G': 1, 'T': 77},
  {'A': 24, 'C': 63, 'G': 1, 'T': 3936},
  {'A': 15, 'C': 1553, 'G': 1, 'T': 2375},
  {'A': 3, 'C': 4022, 'G': 1, 'T': 14},
  {'A': 4, 'C': 4010, 'G': 1, 'T': 35},
  {'A': 59, 'C': 2452, 'G': 1, 'T': 1523},
  {'A': 31, 'C': 101, 'G': 1, 'T': 3823},
  {'A': 6, 'C': 27, 'G': 1, 'T': 15},
  {'A': 38, 'C': 74, 'G': 1, 'T': 4},
  {'A': 9, 'C': 4010, 'G': 1, 'T': 7},
  {'A': 9, 'C': 4022, 'G': 1, 'T': 6},
  {'A': 21, 'C': 3987, 'G': 1, 'T': 17},
  {'A': 79, 'C': 3922, 'G': 1, 'T': 31},
  {'A': 3872, 'C': 60, 'G': 1, 'T': 20},
  {'A': 172, 'C': 53, 'G': 1, 'T': 71},
  {'A': 1655, 'C': 71, 'G': 1, 'T': 1761},
  {'A': 1761, 'C': 1090, 'G': 1, 'T': 553},
  {'A': 660, 'C': 225, 'G': 1, 'T': 147},
  {'A': 111, 'C': 230, 'G': 1, 'T': 3580},
  {'A': 165, 'C': 3561, 'G': 1, 'T': 195},
  {'A': 2022, 'C': 1256, 'G': 1, 'T': 150},
  {'A': 3366, 'C': 216, 'G': 1, 'T': 232},
  {'A': 2216, 'C': 348, 'G': 1, 'T': 1219},
  {'A': 1886, 'C': 710, 'G': 1, 'T': 289},
  {'A': 410, 'C': 1699, 'G': 1, 'T': 979},
  {'A': 1172, 'C': 2023, 'G': 1, 'T': 377},
  {'A': 978, 'C': 651, 'G': 1, 'T': 412},
  {'A': 437, 'C': 1038, 'G': 1, 'T': 2049},
  {'A': 1692, 'C': 1164, 'G': 1, 'T': 734},
  {'A': 949, 'C': 937, 'G': 1, 'T': 471},
  {'A': 664, 'C': 1072, 'G': 1, 'T': 1601},
  {'A': 1656, 'C': 1062, 'G': 1, 'T': 710},
  {'A': 1005, 'C': 877, 'G': 1, 'T': 590},
  {'A': 558, 'C': 1298, 'G': 1, 'T': 1498},
  {'A': 1408, 'C': 1251, 'G': 1, 'T': 844},
  {'A': 1106, 'C': 970, 'G': 1, 'T': 544},
  {'A': 666, 'C': 1363, 'G': 1, 'T': 1331},
  {'A': 1280, 'C': 1353, 'G': 1, 'T': 909},
  {'A': 1230, 'C': 917, 'G': 1, 'T': 709},
  {'A': 744, 'C': 1415, 'G': 1, 'T': 1097},
  {'A': 1131, 'C': 1305, 'G': 1, 'T': 1076},
  {'A': 1377, 'C': 956, 'G': 1, 'T': 713},
  {'A': 746, 'C': 1461, 'G': 1, 'T': 1063},
  {'A': 967, 'C': 1438, 'G': 1, 'T': 1089},
  {'A': 1362, 'C': 1144, 'G': 1, 'T': 674},
  {'A': 718, 'C': 1536, 'G': 1, 'T': 934},
  {'A': 1064, 'C': 1411, 'G': 1, 'T': 1028},
  {'A': 1322, 'C': 1097, 'G': 1, 'T': 754},
  {'A': 706, 'C': 1442, 'G': 1, 'T': 1015},
  {'A': 1142, 'C': 1407, 'G': 1, 'T': 1020},
  {'A': 1330, 'C': 1073, 'G': 1, 'T': 780},
  {'A': 613, 'C': 1408, 'G': 1, 'T': 1175},
  {'A': 1266, 'C': 1321, 'G': 1, 'T': 940},
  {'A': 1410, 'C': 1116, 'G': 1, 'T': 734},
  {'A': 617, 'C': 1318, 'G': 1, 'T': 1166},
  {'A': 1283, 'C': 1399, 'G': 1, 'T': 907},
  {'A': 1434, 'C': 1016, 'G': 1, 'T': 686},
  {'A': 757, 'C': 1247, 'G': 1, 'T': 1185},
  {'A': 1413, 'C': 1423, 'G': 1, 'T': 806},
  {'A': 1357, 'C': 1101, 'G': 1, 'T': 673},
  {'A': 698, 'C': 1070, 'G': 1, 'T': 1297},
  {'A': 1357, 'C': 1453, 'G': 1, 'T': 819},
  {'A': 1287, 'C': 1188, 'G': 1, 'T': 592},
  {'A': 705, 'C': 1000, 'G': 1, 'T': 1241},
  {'A': 1305, 'C': 1606, 'G': 1, 'T': 802},
  {'A': 1157, 'C': 1223, 'G': 1, 'T': 570},
  {'A': 699, 'C': 997, 'G': 1, 'T': 1225},
  {'A': 1165, 'C': 1706, 'G': 1, 'T': 802},
  {'A': 1042, 'C': 1176, 'G': 1, 'T': 595},
  {'A': 619, 'C': 835, 'G': 1, 'T': 1291},
  {'A': 1098, 'C': 1761, 'G': 1, 'T': 854},
  {'A': 970, 'C': 1295, 'G': 1, 'T': 609},
  {'A': 648, 'C': 753, 'G': 1, 'T': 1266},
  {'A': 1036, 'C': 1749, 'G': 1, 'T': 915},
  {'A': 935, 'C': 1319, 'G': 1, 'T': 522},
  {'A': 641, 'C': 797, 'G': 1, 'T': 1254},
  {'A': 897, 'C': 1837, 'G': 1, 'T': 885},
  {'A': 843, 'C': 1392, 'G': 1, 'T': 480},
  {'A': 724, 'C': 771, 'G': 1, 'T': 1165},
  {'A': 812, 'C': 1762, 'G': 1, 'T': 996},
  {'A': 840, 'C': 1283, 'G': 1, 'T': 634},
  {'A': 676, 'C': 831, 'G': 1, 'T': 1173},
  {'A': 761, 'C': 1582, 'G': 1, 'T': 1206},
  {'A': 638, 'C': 1452, 'G': 1, 'T': 574},
  {'A': 819, 'C': 868, 'G': 1, 'T': 1084},
  {'A': 607, 'C': 1617, 'G': 1, 'T': 1267},
  {'A': 720, 'C': 1518, 'G': 1, 'T': 565},
  {'A': 939, 'C': 887, 'G': 1, 'T': 1072},
  {'A': 612, 'C': 1413, 'G': 1, 'T': 1250},
  {'A': 625, 'C': 1440, 'G': 1, 'T': 563},
  {'A': 954, 'C': 845, 'G': 1, 'T': 1122},
  {'A': 555, 'C': 1267, 'G': 1, 'T': 1421},
  {'A': 661, 'C': 1528, 'G': 1, 'T': 580},
  {'A': 1032, 'C': 835, 'G': 1, 'T': 1179},
  {'A': 618, 'C': 1048, 'G': 1, 'T': 1534},
  {'A': 562, 'C': 1625, 'G': 1, 'T': 571},
  {'A': 1238, 'C': 772, 'G': 1, 'T': 1098},
  {'A': 631, 'C': 1015, 'G': 1, 'T': 1407},
  {'A': 517, 'C': 1572, 'G': 1, 'T': 580},
  {'A': 1188, 'C': 690, 'G': 1, 'T': 1138},
  {'A': 671, 'C': 991, 'G': 1, 'T': 1369},
  {'A': 529, 'C': 1481, 'G': 1, 'T': 679},
  {'A': 1138, 'C': 658, 'G': 1, 'T': 1246},
  {'A': 767, 'C': 767, 'G': 1, 'T': 1452},
  {'A': 552, 'C': 1536, 'G': 1, 'T': 618},
  {'A': 1224, 'C': 632, 'G': 1, 'T': 1321},
  {'A': 715, 'C': 774, 'G': 1, 'T': 1518},
  {'A': 503, 'C': 1520, 'G': 1, 'T': 665},
  {'A': 1109, 'C': 596, 'G': 1, 'T': 1328},
  {'A': 858, 'C': 735, 'G': 1, 'T': 1449},
  {'A': 477, 'C': 1488, 'G': 1, 'T': 573},
  {'A': 1004, 'C': 587, 'G': 1, 'T': 1327},
  {'A': 840, 'C': 638, 'G': 1, 'T': 1606},
  {'A': 437, 'C': 1376, 'G': 1, 'T': 627},
  {'A': 874, 'C': 544, 'G': 1, 'T': 1458},
  {'A': 844, 'C': 614, 'G': 1, 'T': 1754},
  {'A': 416, 'C': 1402, 'G': 1, 'T': 652},
  {'A': 950, 'C': 429, 'G': 1, 'T': 1488},
  {'A': 804, 'C': 718, 'G': 1, 'T': 1731},
  {'A': 330, 'C': 1233, 'G': 1, 'T': 743},
  {'A': 834, 'C': 522, 'G': 1, 'T': 1440},
  {'A': 829, 'C': 754, 'G': 1, 'T': 1672},
  {'A': 400, 'C': 990, 'G': 1, 'T': 803},
  {'A': 771, 'C': 457, 'G': 1, 'T': 1476},
  {'A': 839, 'C': 833, 'G': 1, 'T': 1802},
  {'A': 380, 'C': 999, 'G': 1, 'T': 840},
  {'A': 830, 'C': 547, 'G': 1, 'T': 1404},
  {'A': 845, 'C': 779, 'G': 1, 'T': 1797},
  {'A': 375, 'C': 911, 'G': 1, 'T': 940},
  {'A': 851, 'C': 547, 'G': 1, 'T': 1212},
  {'A': 882, 'C': 794, 'G': 1, 'T': 1765},
  {'A': 352, 'C': 816, 'G': 1, 'T': 915},
  {'A': 942, 'C': 567, 'G': 1, 'T': 1094},
  {'A': 840, 'C': 746, 'G': 1, 'T': 1857},
  {'A': 367, 'C': 834, 'G': 1, 'T': 913},
  {'A': 925, 'C': 726, 'G': 1, 'T': 1078},
  {'A': 766, 'C': 854, 'G': 1, 'T': 1883},
  {'A': 387, 'C': 739, 'G': 1, 'T': 905},
  {'A': 900, 'C': 847, 'G': 1, 'T': 937},
  {'A': 705, 'C': 920, 'G': 1, 'T': 1810},
  {'A': 430, 'C': 889, 'G': 1, 'T': 867},
  {'A': 1025, 'C': 854, 'G': 1, 'T': 930},
  {'A': 793, 'C': 835, 'G': 1, 'T': 1734},
  {'A': 417, 'C': 821, 'G': 1, 'T': 869},
  {'A': 1023, 'C': 994, 'G': 1, 'T': 950},
  {'A': 722, 'C': 910, 'G': 1, 'T': 1641},
  {'A': 451, 'C': 848, 'G': 1, 'T': 821},
  {'A': 1039, 'C': 898, 'G': 1, 'T': 981},
  {'A': 701, 'C': 944, 'G': 1, 'T': 1557},
  {'A': 399, 'C': 996, 'G': 1, 'T': 880},
  {'A': 967, 'C': 847, 'G': 1, 'T': 1014},
  {'A': 856, 'C': 929, 'G': 1, 'T': 1440},
  {'A': 402, 'C': 1010, 'G': 1, 'T': 884},
  {'A': 966, 'C': 841, 'G': 1, 'T': 1045},
  {'A': 849, 'C': 947, 'G': 1, 'T': 1360},
  {'A': 440, 'C': 941, 'G': 1, 'T': 810},
  {'A': 802, 'C': 787, 'G': 1, 'T': 1342},
  {'A': 898, 'C': 1032, 'G': 1, 'T': 1318},
  {'A': 489, 'C': 925, 'G': 1, 'T': 788},
  {'A': 795, 'C': 750, 'G': 1, 'T': 1423},
  {'A': 865, 'C': 1091, 'G': 1, 'T': 1287},
  {'A': 511, 'C': 851, 'G': 1, 'T': 901},
  {'A': 705, 'C': 709, 'G': 1, 'T': 1506},
  {'A': 966, 'C': 958, 'G': 1, 'T': 1306},
  {'A': 680, 'C': 734, 'G': 1, 'T': 795},
  {'A': 665, 'C': 869, 'G': 1, 'T': 1446},
  {'A': 1045, 'C': 950, 'G': 1, 'T': 1344},
  {'A': 661, 'C': 809, 'G': 1, 'T': 833},
  {'A': 525, 'C': 952, 'G': 1, 'T': 1503},
  {'A': 883, 'C': 970, 'G': 1, 'T': 1515},
  {'A': 845, 'C': 683, 'G': 1, 'T': 914},
  {'A': 595, 'C': 972, 'G': 1, 'T': 1471},
  {'A': 890, 'C': 1111, 'G': 1, 'T': 1441},
  {'A': 998, 'C': 700, 'G': 1, 'T': 895},
  {'A': 542, 'C': 1102, 'G': 1, 'T': 1466},
  {'A': 943, 'C': 878, 'G': 1, 'T': 1574},
  {'A': 1191, 'C': 612, 'G': 1, 'T': 774},
  {'A': 485, 'C': 1162, 'G': 1, 'T': 1507},
  {'A': 984, 'C': 1060, 'G': 1, 'T': 1448},
  {'A': 1308, 'C': 572, 'G': 1, 'T': 829},
  {'A': 548, 'C': 1157, 'G': 1, 'T': 1598},
  {'A': 801, 'C': 1046, 'G': 1, 'T': 1673},
  {'A': 1375, 'C': 615, 'G': 1, 'T': 700},
  {'A': 712, 'C': 1180, 'G': 1, 'T': 1405},
  {'A': 934, 'C': 1035, 'G': 1, 'T': 1591},
  {'A': 1401, 'C': 704, 'G': 1, 'T': 740},
  {'A': 682, 'C': 1295, 'G': 1, 'T': 1361},
  {'A': 1047, 'C': 1040, 'G': 1, 'T': 1544},
  {'A': 1402, 'C': 828, 'G': 1, 'T': 753},
  {'A': 806, 'C': 1352, 'G': 1, 'T': 1190},
  {'A': 882, 'C': 1110, 'G': 1, 'T': 1602},
  {'A': 1372, 'C': 901, 'G': 1, 'T': 802},
  {'A': 944, 'C': 1388, 'G': 1, 'T': 1084},
  {'A': 908, 'C': 1174, 'G': 1, 'T': 1560},
  {'A': 1286, 'C': 1136, 'G': 1, 'T': 769},
  {'A': 955, 'C': 1404, 'G': 1, 'T': 1088},
  {'A': 998, 'C': 1264, 'G': 1, 'T': 1391},
  {'A': 1267, 'C': 1246, 'G': 1, 'T': 744},
  {'A': 1018, 'C': 1344, 'G': 1, 'T': 1091},
  {'A': 909, 'C': 1303, 'G': 1, 'T': 1333},
  {'A': 1048, 'C': 1446, 'G': 1, 'T': 797},
  {'A': 1097, 'C': 1372, 'G': 1, 'T': 989},
  {'A': 958, 'C': 1332, 'G': 1, 'T': 1200},
  {'A': 965, 'C': 1678, 'G': 1, 'T': 720},
  {'A': 986, 'C': 1498, 'G': 1, 'T': 1085},
  {'A': 819, 'C': 1558, 'G': 1, 'T': 1139},
  {'A': 858, 'C': 1784, 'G': 1, 'T': 762},
  {'A': 1054, 'C': 1453, 'G': 1, 'T': 1002},
  {'A': 785, 'C': 1526, 'G': 1, 'T': 1072},
  {'A': 755, 'C': 1841, 'G': 1, 'T': 822},
  {'A': 954, 'C': 1565, 'G': 1, 'T': 924},
  {'A': 662, 'C': 1718, 'G': 1, 'T': 878},
  {'A': 667, 'C': 1968, 'G': 1, 'T': 751},
  {'A': 802, 'C': 1595, 'G': 1, 'T': 990},
  {'A': 629, 'C': 1617, 'G': 1, 'T': 903},
  {'A': 644, 'C': 1985, 'G': 1, 'T': 732},
  {'A': 776, 'C': 1505, 'G': 1, 'T': 942},
  {'A': 591, 'C': 1581, 'G': 1, 'T': 815},
  {'A': 560, 'C': 1930, 'G': 1, 'T': 796},
  {'A': 733, 'C': 1531, 'G': 1, 'T': 930},
  {'A': 601, 'C': 1517, 'G': 1, 'T': 731},
  {'A': 561, 'C': 1863, 'G': 1, 'T': 741},
  {'A': 709, 'C': 1542, 'G': 1, 'T': 891},
  {'A': 448, 'C': 1558, 'G': 1, 'T': 772},
  {'A': 496, 'C': 1920, 'G': 1, 'T': 695},
  {'A': 745, 'C': 1516, 'G': 1, 'T': 914},
  {'A': 495, 'C': 1500, 'G': 1, 'T': 746},
  {'A': 482, 'C': 1891, 'G': 1, 'T': 714},
  {'A': 578, 'C': 1754, 'G': 1, 'T': 787},
  {'A': 519, 'C': 1575, 'G': 1, 'T': 609},
  {'A': 479, 'C': 1743, 'G': 1, 'T': 715},
  {'A': 666, 'C': 1584, 'G': 1, 'T': 764},
  {'A': 496, 'C': 1496, 'G': 1, 'T': 757},
  {'A': 512, 'C': 1753, 'G': 1, 'T': 699},
  {'A': 619, 'C': 1648, 'G': 1, 'T': 735},
  {'A': 621, 'C': 1457, 'G': 1, 'T': 696},
  {'A': 567, 'C': 1684, 'G': 1, 'T': 687},
  {'A': 589, 'C': 1566, 'G': 1, 'T': 784},
  {'A': 509, 'C': 1448, 'G': 1, 'T': 750},
  {'A': 561, 'C': 1560, 'G': 1, 'T': 618},
  {'A': 754, 'C': 1457, 'G': 1, 'T': 675},
  {'A': 583, 'C': 1403, 'G': 1, 'T': 766},
  {'A': 606, 'C': 1472, 'G': 1, 'T': 693},
  {'A': 714, 'C': 1512, 'G': 1, 'T': 686},
  {'A': 637, 'C': 1391, 'G': 1, 'T': 809},
  {'A': 645, 'C': 1344, 'G': 1, 'T': 729},
  {'A': 717, 'C': 1426, 'G': 1, 'T': 621},
  {'A': 689, 'C': 1263, 'G': 1, 'T': 806},
  {'A': 656, 'C': 1293, 'G': 1, 'T': 674},
  {'A': 739, 'C': 1411, 'G': 1, 'T': 497},
  {'A': 713, 'C': 1235, 'G': 1, 'T': 780},
  {'A': 706, 'C': 1210, 'G': 1, 'T': 600},
  {'A': 820, 'C': 1181, 'G': 1, 'T': 540},
  {'A': 667, 'C': 1244, 'G': 1, 'T': 705},
  {'A': 723, 'C': 1064, 'G': 1, 'T': 596},
  {'A': 862, 'C': 1206, 'G': 1, 'T': 472},
  {'A': 716, 'C': 1125, 'G': 1, 'T': 699},
  {'A': 771, 'C': 957, 'G': 1, 'T': 570},
  {'A': 796, 'C': 1086, 'G': 1, 'T': 494},
  {'A': 807, 'C': 984, 'G': 1, 'T': 650},
  {'A': 773, 'C': 870, 'G': 1, 'T': 525},
  {'A': 901, 'C': 961, 'G': 1, 'T': 438},
  {'A': 771, 'C': 942, 'G': 1, 'T': 604},
  {'A': 751, 'C': 823, 'G': 1, 'T': 439},
  {'A': 804, 'C': 922, 'G': 1, 'T': 406},
  {'A': 757, 'C': 912, 'G': 1, 'T': 535},
  {'A': 759, 'C': 763, 'G': 1, 'T': 418},
  {'A': 743, 'C': 842, 'G': 1, 'T': 398},
  {'A': 765, 'C': 766, 'G': 1, 'T': 451},
  {'A': 746, 'C': 643, 'G': 1, 'T': 365},
  {'A': 659, 'C': 736, 'G': 1, 'T': 358},
  {'A': 686, 'C': 699, 'G': 1, 'T': 387},
  {'A': 669, 'C': 588, 'G': 1, 'T': 297},
  {'A': 648, 'C': 678, 'G': 1, 'T': 284},
  {'A': 645, 'C': 624, 'G': 1, 'T': 342},
  {'A': 581, 'C': 518, 'G': 1, 'T': 275},
  {'A': 633, 'C': 568, 'G': 1, 'T': 287},
  {'A': 588, 'C': 486, 'G': 1, 'T': 321},
  {'A': 544, 'C': 449, 'G': 1, 'T': 229},
  {'A': 480, 'C': 483, 'G': 1, 'T': 282},
  {'A': 562, 'C': 445, 'G': 1, 'T': 263},
  {'A': 452, 'C': 435, 'G': 1, 'T': 178},
  {'A': 442, 'C': 420, 'G': 1, 'T': 237},
  {'A': 449, 'C': 392, 'G': 1, 'T': 235},
  {'A': 419, 'C': 348, 'G': 1, 'T': 171},
  {'A': 397, 'C': 377, 'G': 1, 'T': 198},
  {'A': 391, 'C': 366, 'G': 1, 'T': 218},
  {'A': 356, 'C': 330, 'G': 1, 'T': 150},
  {'A': 336, 'C': 333, 'G': 1, 'T': 175},
  {'A': 327, 'C': 269, 'G': 1, 'T': 189},
  {'A': 269, 'C': 265, 'G': 1, 'T': 125},
  {'A': 280, 'C': 260, 'G': 1, 'T': 127},
  {'A': 274, 'C': 256, 'G': 1, 'T': 152},
  {'A': 242, 'C': 261, 'G': 1, 'T': 96},
  {'A': 243, 'C': 219, 'G': 1, 'T': 130},
  {'A': 248, 'C': 220, 'G': 1, 'T': 150},
  {'A': 195, 'C': 275, 'G': 1, 'T': 89},
  {'A': 197, 'C': 265, 'G': 1, 'T': 105},
  {'A': 212, 'C': 226, 'G': 1, 'T': 135},
  {'A': 191, 'C': 222, 'G': 1, 'T': 134},
  {'A': 200, 'C': 195, 'G': 1, 'T': 128},
  {'A': 170, 'C': 218, 'G': 1, 'T': 144},
  {'A': 133, 'C': 223, 'G': 1, 'T': 104},
  {'A': 149, 'C': 215, 'G': 1, 'T': 94},
  {'A': 173, 'C': 183, 'G': 1, 'T': 87},
  {'A': 137, 'C': 199, 'G': 1, 'T': 104},
  {'A': 126, 'C': 173, 'G': 1, 'T': 125},
  {'A': 167, 'C': 171, 'G': 1, 'T': 89},
  {'A': 117, 'C': 212, 'G': 1, 'T': 97},
  {'A': 137, 'C': 175, 'G': 1, 'T': 91},
  {'A': 121, 'C': 173, 'G': 1, 'T': 83},
  {'A': 112, 'C': 174, 'G': 1, 'T': 98},
  {'A': 124, 'C': 152, 'G': 1, 'T': 96},
  {'A': 122, 'C': 152, 'G': 1, 'T': 92},
  {'A': 99, 'C': 188, 'G': 1, 'T': 81},
  {'A': 108, 'C': 144, 'G': 1, 'T': 98},
  {'A': 119, 'C': 150, 'G': 1, 'T': 84},
  {'A': 111, 'C': 150, 'G': 1, 'T': 78},
  {'A': 103, 'C': 136, 'G': 1, 'T': 78},
  {'A': 129, 'C': 118, 'G': 1, 'T': 70},
  {'A': 138, 'C': 127, 'G': 1, 'T': 63},
  {'A': 112, 'C': 150, 'G': 1, 'T': 74},
  {'A': 105, 'C': 115, 'G': 1, 'T': 79},
  {'A': 106, 'C': 142, 'G': 1, 'T': 60},
  {'A': 112, 'C': 138, 'G': 1, 'T': 58},
  {'A': 107, 'C': 126, 'G': 1, 'T': 59},
  {'A': 118, 'C': 104, 'G': 1, 'T': 61},
  {'A': 109, 'C': 103, 'G': 1, 'T': 60},
  {'A': 116, 'C': 104, 'G': 1, 'T': 49},
  {'A': 93, 'C': 111, 'G': 1, 'T': 62},
  {'A': 92, 'C': 112, 'G': 1, 'T': 62},
  {'A': 107, 'C': 98, 'G': 1, 'T': 50},
  {'A': 110, 'C': 123, 'G': 1, 'T': 44},
  {'A': 92, 'C': 90, 'G': 1, 'T': 42},
  {'A': 117, 'C': 76, 'G': 1, 'T': 32},
  {'A': 96, 'C': 78, 'G': 1, 'T': 41},
  {'A': 96, 'C': 74, 'G': 1, 'T': 41},
  {'A': 79, 'C': 77, 'G': 1, 'T': 32},
  {'A': 73, 'C': 81, 'G': 1, 'T': 33},
  {'A': 70, 'C': 81, 'G': 1, 'T': 37},
  {'A': 72, 'C': 64, 'G': 1, 'T': 37},
  {'A': 70, 'C': 64, 'G': 1, 'T': 27},
  {'A': 75, 'C': 50, 'G': 1, 'T': 41},
  {'A': 69, 'C': 41, 'G': 1, 'T': 33},
  {'A': 54, 'C': 55, 'G': 1, 'T': 26},
  {'A': 34, 'C': 50, 'G': 1, 'T': 26},
  {'A': 52, 'C': 49, 'G': 1, 'T': 11},
  {'A': 40, 'C': 46, 'G': 1, 'T': 22},
  {'A': 39, 'C': 41, 'G': 1, 'T': 16},
  {'A': 39, 'C': 37, 'G': 1, 'T': 14},
  {'A': 42, 'C': 32, 'G': 1, 'T': 12},
  {'A': 31, 'C': 20, 'G': 1, 'T': 14},
  {'A': 40, 'C': 15, 'G': 1, 'T': 12},
  {'A': 23, 'C': 27, 'G': 1, 'T': 16},
  {'A': 31, 'C': 23, 'G': 1, 'T': 7},
  {'A': 18, 'C': 25, 'G': 1, 'T': 8},
  {'A': 23, 'C': 23, 'G': 1, 'T': 7},
  {'A': 22, 'C': 15, 'G': 1, 'T': 12},
  {'A': 23, 'C': 18, 'G': 1, 'T': 9},
  {'A': 23, 'C': 12, 'G': 1, 'T': 13},
  {'A': 24, 'C': 13, 'G': 1, 'T': 10},
  {'A': 15, 'C': 15, 'G': 1, 'T': 7},
  {'A': 10, 'C': 15, 'G': 1, 'T': 3},
  {'A': 9, 'C': 14, 'G': 1, 'T': 1},
  {'A': 6, 'C': 5, 'G': 1, 'T': 2},
  {'A': 9, 'C': 6, 'G': 1, 'T': 2},
  {'A': 4, 'C': 3, 'G': 1, 'T': 5},
  {'A': 12, 'C': 4, 'G': 1, 'T': 2},
  {'A': 6, 'C': 5, 'G': 1, 'T': 3},
  {'A': 6, 'C': 4, 'G': 1, 'T': 2},
  {'A': 2, 'C': 7, 'G': 1, 'T': 1},
  {'A': 5, 'C': 5, 'G': 1, 'T': 0},
  {'A': 2, 'C': 1, 'G': 1, 'T': 2},
  {'A': 4, 'C': 1, 'G': 1, 'T': 2},
  {'A': 2, 'C': 3, 'G': 1, 'T': 2},
  {'A': 3, 'C': 3, 'G': 1, 'T': 1},
  {'A': 2, 'C': 2, 'G': 1, 'T': 2},
  {'A': 1, 'C': 2, 'G': 1, 'T': 1},
  {'A': 1, 'C': 2, 'G': 0, 'T': 1},
  {'A': 1, 'C': 1, 'G': 1, 'T': 0},
  {'A': 1, 'C': 1, 'G': 1, 'T': 0},
  {'A': 1, 'C': 0, 'G': 0, 'T': 1},
  {'A': 0, 'C': 1, 'G': 1, 'T': 0},
  {'A': 1, 'C': 1, 'G': 0, 'T': 0},
  {'A': 0, 'C': 0, 'G': 1, 'T': 1},
  {'A': 0, 'C': 1, 'G': 1, 'T': 0},
  {'A': 0, 'C': 0, 'G': 1, 'T': 1},
  {'A': 0, 'C': 1, 'G': 1, 'T': 0},
  {'A': 1, 'C': 0, 'G': 1, 'T': 0},
  {'A': 0, 'C': 1, 'G': 0, 'T': 1},
  {'A': 0, 'C': 0, 'G': 1, 'T': 0},
  {'A': 1, 'C': 0, 'G': 1, 'T': 0},
  {'A': 0, 'C': 1, 'G': 1, 'T': 0},
  {'A': 0, 'C': 0, 'G': 1, 'T': 0},
  {'A': 1, 'C': 1, 'G': 0, 'T': 0},
  {'A': 0, 'C': 0, 'G': 1, 'T': 0},
  {'A': 1, 'C': 1, 'G': 0, 'T': 0},
  {'A': 0, 'C': 0, 'G': 1, 'T': 0},
  {'A': 1, 'C': 0, 'G': 1, 'T': 0},
  {'A': 0, 'C': 0, 'G': 1, 'T': 1},
  {'A': 2, 'C': 0, 'G': 0, 'T': 0},
  {'A': 0, 'C': 1, 'G': 1, 'T': 0},
  {'A': 0, 'C': 1, 'G': 1, 'T': 0},
  {'A': 0, 'C': 0, 'G': 1, 'T': 0},
  {'A': 1, 'C': 0, 'G': 0, 'T': 0},
  {'A': 0, 'C': 0, 'G': 0, 'T': 1},
  {'A': 1, 'C': 0, 'G': 0, 'T': 0},
  {'A': 0, 'C': 1, 'G': 0, 'T': 0},
  {'A': 0, 'C': 0, 'G': 1, 'T': 0}])

In [ ]: