Analysis of the libraries HJJ7JBCX2_ZIKV

Here we plot the major base frequency for different time points. We also detect positions that consistently rise in frequency through time, i.e. positions that rise monotonously in frequency with time. Positions that only rise in frequency between the first and the last time points have not been investigated.


In [1]:
import pandas as pd
import matplotlib.pyplot as plt
import matplotlib.ticker as mticker
from pylab import rcParams
import seaborn as sns
from array import array
import numpy as np
from scipy.stats import ttest_ind
from scipy.stats import linregress
from scipy.stats import mannwhitneyu
%matplotlib inline

Obtaining the sequence annotation


In [2]:
begins=[]
ends=[]
names =[]
with open ("sequence.gb") as f:
    in_pep = False
    for l in f:
        if "mat_peptide" in l:
            begins.append(int(l.split()[1].split("..")[0]))
            ends.append(int(l.split()[1].split("..")[1]))
            in_pep = True
        elif in_pep :
            names.append(l.split("=")[1])
            in_pep = False
            
print(begins)
print(ends)
print(names)


[108, 474, 753, 978, 2490, 3546, 4224, 4614, 6465, 6846, 6915, 7668]
[473, 752, 977, 2489, 3545, 4223, 4613, 6464, 6845, 6914, 7667, 10376]
['"capsid"\n', '"propeptide"\n', '"membrane"\n', '"envelope"\n', '"NS1"\n', '"NS2A"\n', '"NS2B"\n', '"NS3"\n', '"NS4A"\n', '"2K"\n', '"NS4B"\n', '"NS5"\n']

Functions to plot interesting positions and gene boundaries


In [3]:
# Interesting positions
positions=[316,1670,1785,2340,5935,7172,8449,9165]
def plot_positions():
    for x in positions:
        plt.axvline(x=x, linewidth=1, linestyle=':')
        
def plot_genes():
    for i in range(len(begins)):
        plt.plot([begins[i], begins[i]], [0.99,1.0], linewidth=2, linestyle='-', color="black")
        if i%2==0:
            plt.text (begins[i] + ((ends[i] - begins[i])/10), 1.005, (names[i].replace('"', ''))[0:3], size='xx-small')
        else:
            plt.text (begins[i] + ((ends[i] - begins[i])/10), 1.015, (names[i].replace('"', ''))[0:3], size='xx-small')
    plt.plot([ends[-1], ends[-1]], [0.99,1.0], linewidth=2, linestyle='-', color="black")

Function to add useful columns to the tables


In [4]:
def synonymous (row):
    if row['null'] or (row['Consensus_aa']==row['Secondbase_aa'] ):
        return "synonymous" 
    else:
        return "non-synonymous"

def add_columns(table):
    table['null'] = (table['Secondbase_aa']).isnull()
    table['is_synonymous'] = table.apply (lambda row: synonymous (row),axis=1)
    table['1_major_variant_frequency'] = 1.0 - table['Major_variant_frequency_quality_corrected']

Functions to detect variants increasing in frequency


In [5]:
def is_increasing(minor_frequencies):
    #print(minor_frequencies)
    previous = minor_frequencies[0]
    for m in range(1,len(minor_frequencies)):
        if previous < minor_frequencies[m]:
            #print(str(previous) + " < " + str(minor_frequencies[m]))
            previous = minor_frequencies[m]
        else:
            return False
    return True

def get_variant_frequency(variant, table, i):
    sum_of_bases = table['As_quality_corrected'][i]+table['Cs_quality_corrected'][i]+table['Gs_quality_corrected'][i]+table['Ts_quality_corrected'][i]+table['Ns_quality_corrected'][i]
    if variant == "A":
        return table["As_quality_corrected"][i] / sum_of_bases
    elif variant == "C":
        return table["Cs_quality_corrected"][i] / sum_of_bases
    elif variant == "G":
        return table["Gs_quality_corrected"][i] / sum_of_bases
    elif variant == "T":
        return table["Ts_quality_corrected"][i] / sum_of_bases
    else:
        return np.nan
        

def get_increasing_variants(tables):
    num_tables = len(tables)
    first = tables[0]
    last = tables[num_tables-1]
    major = ""
    minor = ""
    major_frequencies = array('d',[0.0]*num_tables)
    minor_frequencies = array('d',[0.0]*num_tables)
    increasingVariants = dict()
    for i in first["Position"]:
        major = first["Major_variant"][i]
        #print(last['Major_variant_frequency_quality_corrected'][i])
        major_frequencies[0] = first['Major_variant_frequency_quality_corrected'][i]
        if major == last["Major_variant"][i]:
            minor = last["Second_variant"][i]
        else:
            minor = last["Major_variant"][i]
        minor_frequencies[0] = get_variant_frequency(minor, first, i)
        for table_id in range(1, num_tables):
            major_frequencies[table_id] = get_variant_frequency(major, tables[table_id], i)
            minor_frequencies[table_id] = get_variant_frequency(minor, tables[table_id], i)
        if is_increasing(minor_frequencies):
            increasingVariants[i] = [major_frequencies.tolist(), minor_frequencies.tolist()]
    return increasingVariants

Reading all data


In [6]:
# Clone at days 12, 15, 19
clone12 = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1cloneD12_1_sequence.txt.assembled.fastq_mapped_AA.csv", na_values=" -nan")
clone15 = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1cloneD15_1_sequence.txt.assembled.fastq_mapped_AA.csv", na_values=" -nan")
clone19 = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1cloneD19_1_sequence.txt.assembled.fastq_mapped_AA.csv", na_values=" -nan")

add_columns(clone12)
add_columns(clone15)
add_columns(clone19)

In [7]:
# Control runs, replicate A
DD18_A = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1DD18A_1_sequence.txt.assembled.fastq_mapped_AA.csv", na_values=" -nan")
add_columns(DD18_A)

In [8]:
# Control runs, replicate D
DD18_D = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1DD18D_1_sequence.txt.assembled.fastq_mapped_AA.csv")
add_columns(DD18_D)

In [9]:
# Control runs, replicate E
DD3_E = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1DD3E_1_sequence.txt.assembled.fastq_mapped_AA.csv")
DD12_E = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1DD12E_1_sequence.txt.assembled.fastq_mapped_AA.csv")
DD18_E = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1DD18E_1_sequence.txt.assembled.fastq_mapped_AA.csv")
DD24_E = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1DD24E_1_sequence.txt.assembled.fastq_mapped_AA.csv")
DD24crude_E = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1DD24Ecrude_1_sequence.txt.assembled.fastq_mapped_AA.csv")

add_columns(DD3_E)
add_columns(DD12_E)
add_columns(DD18_E)
add_columns(DD24_E)
add_columns(DD24crude_E)

In [10]:
# TLR3 activation runs, replicate A
TD18_A = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1TD18A_1_sequence.txt.assembled.fastq_mapped_AA.csv")
add_columns(TD18_A)

In [11]:
# TLR3 activation runs, replicate D
TD9_D = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1TD9D_1_sequence.txt.assembled.fastq_mapped_AA.csv")
TD12_D = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1TD12D_1_sequence.txt.assembled.fastq_mapped_AA.csv")
TD18_D = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1TD18D_1_sequence.txt.assembled.fastq_mapped_AA.csv")
TD24_D = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1TD24D_1_sequence.txt.assembled.fastq_mapped_AA.csv")

add_columns(TD9_D)
add_columns(TD12_D)
add_columns(TD18_D)
add_columns(TD24_D)

In [12]:
# TLR3 activation runs, replicate E
TD9_E = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1TD9E_1_sequence.txt.assembled.fastq_mapped_AA.csv")
TD12_E = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1TD12E_1_sequence.txt.assembled.fastq_mapped_AA.csv")
TD18_E = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1TD18E_1_sequence.txt.assembled.fastq_mapped_AA.csv")
TD24_E = pd.read_csv ("HJJ7JBCX2_ZIKV-s-and-c_18s004258-1-1_DREUX_lane1TD24E_1_sequence.txt.assembled.fastq_mapped_AA.csv")
add_columns(TD9_E)
add_columns(TD12_E)
add_columns(TD18_E)
add_columns(TD24_E)

Analysis of the clone samples

Coverage


In [13]:
variable = 'Coverage'
sample = len(clone12 [variable])*["Clone 12"]+len(clone15 [variable])*["Clone 15"]+len(clone19 [variable])*["Clone 19"]

overlay_table_concat = pd.DataFrame ({'Position':pd.concat([clone12['Position'],clone15['Position'],clone19['Position']]), variable:pd.concat([clone12 [variable],clone15 [variable], clone19 [variable] ]), 'sample':sample})



sns.lmplot( x="Position", y=variable, data=overlay_table_concat, fit_reg=False, hue='sample', legend=False, size=7, aspect=2, lowess=True,scatter_kws={"s": 20})
plt.legend(loc='lower right')
plot_positions()
plot_genes()


Basically, we find the same areas of low and high coverage.

Analysis of the frequency of the major variant


In [14]:
variable = 'Major_variant_frequency'
sample = len(clone12 [variable])*["Clone 12"]+len(clone15 [variable])*["Clone 15"]+len(clone19 [variable])*["Clone 19"]

overlay_table_concat = pd.DataFrame ({'Position':pd.concat([clone12['Position'],clone15['Position'],clone19['Position']]), variable:pd.concat([clone12 [variable],clone15 [variable], clone19 [variable] ]), 'sample':sample})



sns.lmplot( x="Position", y=variable, data=overlay_table_concat, fit_reg=False, hue='sample', legend=False, size=7, aspect=2, lowess=True,scatter_kws={"s": 20})
plt.legend(loc='lower right')
plot_positions()
plot_genes()


Variants that increase in frequency


In [15]:
tables_clone = [clone12, clone15, clone19]
increasing_clone = get_increasing_variants(tables_clone)
print("There are "+str(len(increasing_clone))+" positions that rise in frequency.")
print("Those are:")
print(increasing_clone.keys())


There are 4300 positions that rise in frequency.
Those are:
dict_keys([27, 29, 34, 35, 36, 43, 49, 53, 56, 57, 59, 61, 65, 66, 68, 70, 72, 73, 78, 80, 88, 97, 101, 112, 115, 118, 120, 129, 131, 133, 155, 159, 161, 167, 169, 170, 171, 172, 173, 175, 178, 184, 192, 193, 195, 200, 201, 203, 206, 207, 211, 214, 218, 222, 224, 225, 229, 232, 234, 243, 245, 246, 249, 252, 253, 257, 262, 265, 267, 269, 275, 276, 282, 283, 293, 302, 303, 320, 325, 335, 344, 349, 356, 358, 368, 374, 375, 376, 377, 379, 380, 382, 388, 391, 392, 395, 396, 397, 401, 405, 406, 410, 419, 420, 421, 422, 424, 425, 426, 427, 429, 430, 435, 442, 444, 445, 446, 448, 449, 450, 451, 453, 454, 456, 459, 460, 462, 463, 465, 467, 470, 471, 476, 477, 479, 480, 481, 484, 485, 487, 488, 489, 493, 494, 496, 498, 501, 502, 503, 509, 514, 517, 518, 520, 522, 523, 524, 526, 527, 528, 529, 533, 534, 536, 537, 539, 543, 544, 548, 549, 550, 553, 554, 563, 568, 572, 575, 581, 586, 588, 592, 593, 595, 597, 600, 601, 602, 612, 613, 615, 616, 618, 619, 622, 628, 630, 632, 634, 635, 636, 639, 643, 650, 653, 656, 657, 661, 662, 664, 665, 666, 667, 671, 676, 679, 681, 686, 690, 693, 694, 699, 702, 710, 712, 715, 716, 718, 719, 722, 723, 727, 729, 731, 735, 737, 740, 743, 745, 747, 748, 749, 752, 753, 756, 759, 763, 766, 767, 769, 771, 774, 775, 776, 777, 778, 780, 782, 784, 785, 788, 789, 791, 792, 793, 794, 795, 798, 806, 812, 814, 815, 816, 820, 823, 826, 827, 828, 831, 835, 840, 843, 847, 853, 854, 858, 859, 861, 862, 868, 870, 874, 875, 876, 877, 881, 887, 888, 891, 892, 893, 894, 895, 898, 900, 902, 903, 905, 915, 916, 918, 920, 925, 926, 929, 931, 932, 934, 936, 937, 938, 942, 944, 946, 948, 950, 952, 953, 955, 957, 963, 964, 966, 968, 970, 973, 974, 978, 979, 980, 983, 985, 992, 995, 996, 1004, 1006, 1008, 1013, 1033, 1044, 1047, 1048, 1051, 1054, 1060, 1061, 1064, 1067, 1068, 1069, 1072, 1074, 1077, 1078, 1079, 1080, 1081, 1082, 1085, 1090, 1093, 1094, 1098, 1099, 1100, 1101, 1102, 1103, 1105, 1106, 1109, 1110, 1115, 1116, 1119, 1120, 1122, 1123, 1126, 1127, 1129, 1130, 1131, 1132, 1134, 1139, 1142, 1147, 1148, 1149, 1150, 1151, 1153, 1154, 1155, 1157, 1159, 1160, 1162, 1163, 1164, 1166, 1167, 1171, 1172, 1176, 1177, 1178, 1179, 1182, 1183, 1184, 1185, 1188, 1189, 1190, 1191, 1192, 1193, 1196, 1197, 1198, 1199, 1200, 1203, 1204, 1205, 1206, 1209, 1210, 1213, 1215, 1216, 1217, 1219, 1220, 1224, 1225, 1229, 1230, 1233, 1236, 1237, 1239, 1241, 1247, 1248, 1249, 1251, 1252, 1253, 1254, 1256, 1259, 1260, 1264, 1266, 1269, 1270, 1271, 1274, 1275, 1276, 1282, 1284, 1285, 1287, 1288, 1290, 1291, 1294, 1295, 1296, 1299, 1303, 1305, 1308, 1310, 1315, 1316, 1320, 1321, 1323, 1324, 1326, 1327, 1328, 1330, 1335, 1338, 1339, 1344, 1348, 1352, 1353, 1363, 1365, 1366, 1367, 1371, 1376, 1379, 1383, 1384, 1385, 1386, 1387, 1388, 1391, 1392, 1395, 1397, 1401, 1402, 1405, 1406, 1407, 1408, 1413, 1414, 1416, 1418, 1419, 1420, 1422, 1427, 1430, 1431, 1432, 1437, 1441, 1442, 1447, 1448, 1449, 1450, 1455, 1458, 1462, 1463, 1464, 1466, 1467, 1487, 1489, 1494, 1495, 1497, 1500, 1501, 1503, 1504, 1512, 1514, 1515, 1523, 1524, 1528, 1530, 1531, 1537, 1538, 1540, 1542, 1545, 1546, 1550, 1553, 1554, 1556, 1557, 1560, 1561, 1562, 1563, 1564, 1567, 1568, 1570, 1572, 1576, 1581, 1583, 1585, 1589, 1592, 1593, 1600, 1604, 1605, 1606, 1607, 1608, 1609, 1610, 1614, 1615, 1616, 1617, 1618, 1629, 1630, 1631, 1636, 1640, 1646, 1648, 1650, 1652, 1654, 1662, 1665, 1667, 1668, 1669, 1670, 1674, 1675, 1676, 1679, 1681, 1687, 1689, 1690, 1692, 1694, 1695, 1696, 1698, 1700, 1707, 1708, 1710, 1711, 1719, 1720, 1721, 1722, 1723, 1725, 1726, 1727, 1728, 1730, 1732, 1733, 1734, 1741, 1744, 1749, 1750, 1757, 1761, 1762, 1767, 1770, 1771, 1772, 1775, 1776, 1779, 1780, 1781, 1782, 1785, 1786, 1788, 1790, 1791, 1792, 1793, 1794, 1803, 1805, 1806, 1808, 1811, 1812, 1813, 1814, 1815, 1816, 1817, 1821, 1822, 1823, 1824, 1826, 1829, 1830, 1831, 1832, 1838, 1839, 1841, 1843, 1847, 1849, 1850, 1852, 1853, 1854, 1859, 1860, 1867, 1868, 1869, 1871, 1875, 1876, 1881, 1882, 1887, 1889, 1890, 1891, 1892, 1894, 1895, 1896, 1899, 1902, 1905, 1908, 1909, 1910, 1911, 1912, 1913, 1918, 1920, 1921, 1925, 1926, 1927, 1928, 1929, 1931, 1932, 1936, 1938, 1942, 1944, 1945, 1949, 1950, 1951, 1954, 1956, 1957, 1958, 1959, 1961, 1962, 1967, 1971, 1972, 1973, 1974, 1975, 1977, 1978, 1980, 1983, 1984, 1988, 1989, 1995, 2000, 2001, 2003, 2005, 2006, 2007, 2009, 2013, 2014, 2015, 2016, 2019, 2024, 2029, 2030, 2033, 2034, 2036, 2038, 2040, 2041, 2045, 2047, 2055, 2056, 2058, 2059, 2062, 2063, 2064, 2065, 2069, 2071, 2073, 2074, 2077, 2080, 2081, 2087, 2089, 2091, 2092, 2094, 2095, 2099, 2101, 2102, 2103, 2108, 2109, 2110, 2115, 2116, 2118, 2119, 2120, 2121, 2122, 2126, 2128, 2129, 2130, 2132, 2134, 2142, 2146, 2148, 2153, 2154, 2164, 2167, 2168, 2169, 2171, 2172, 2173, 2177, 2178, 2179, 2183, 2184, 2185, 2186, 2189, 2191, 2192, 2194, 2202, 2208, 2211, 2215, 2217, 2220, 2224, 2226, 2227, 2230, 2231, 2244, 2249, 2250, 2256, 2259, 2265, 2273, 2274, 2276, 2280, 2281, 2284, 2291, 2292, 2301, 2302, 2303, 2306, 2307, 2308, 2311, 2315, 2318, 2319, 2320, 2321, 2328, 2330, 2339, 2345, 2360, 2364, 2365, 2366, 2367, 2369, 2370, 2371, 2377, 2389, 2399, 2402, 2405, 2415, 2417, 2419, 2421, 2422, 2424, 2431, 2436, 2438, 2440, 2441, 2446, 2447, 2448, 2456, 2458, 2459, 2460, 2461, 2463, 2464, 2467, 2468, 2470, 2472, 2476, 2477, 2478, 2479, 2481, 2487, 2489, 2491, 2497, 2499, 2502, 2505, 2509, 2511, 2514, 2515, 2516, 2518, 2524, 2526, 2530, 2533, 2536, 2538, 2544, 2545, 2551, 2555, 2557, 2558, 2560, 2562, 2565, 2566, 2568, 2569, 2572, 2576, 2578, 2581, 2584, 2587, 2589, 2590, 2591, 2593, 2594, 2595, 2596, 2599, 2602, 2603, 2604, 2606, 2607, 2608, 2612, 2613, 2615, 2616, 2622, 2624, 2627, 2628, 2630, 2634, 2635, 2638, 2639, 2647, 2651, 2653, 2658, 2659, 2661, 2662, 2664, 2668, 2669, 2670, 2671, 2682, 2683, 2688, 2695, 2696, 2702, 2703, 2708, 2709, 2711, 2713, 2718, 2719, 2722, 2723, 2726, 2727, 2732, 2741, 2743, 2744, 2748, 2751, 2752, 2759, 2761, 2762, 2764, 2767, 2769, 2771, 2773, 2774, 2775, 2776, 2777, 2780, 2782, 2789, 2796, 2798, 2799, 2801, 2802, 2803, 2807, 2808, 2815, 2816, 2819, 2822, 2823, 2824, 2827, 2830, 2845, 2847, 2850, 2851, 2856, 2858, 2860, 2863, 2864, 2865, 2866, 2868, 2870, 2874, 2875, 2878, 2880, 2881, 2884, 2887, 2888, 2903, 2904, 2906, 2907, 2910, 2911, 2917, 2923, 2925, 2928, 2934, 2940, 2947, 2951, 2952, 2955, 2960, 2964, 2966, 2968, 2973, 2980, 2983, 2984, 2985, 2989, 2991, 3000, 3001, 3005, 3007, 3014, 3015, 3019, 3021, 3030, 3034, 3036, 3037, 3044, 3045, 3047, 3052, 3054, 3059, 3060, 3062, 3066, 3070, 3074, 3085, 3092, 3118, 3122, 3125, 3130, 3131, 3134, 3135, 3142, 3148, 3153, 3162, 3166, 3171, 3174, 3178, 3189, 3190, 3193, 3200, 3201, 3203, 3207, 3209, 3212, 3215, 3218, 3221, 3236, 3237, 3238, 3239, 3242, 3243, 3244, 3245, 3249, 3250, 3256, 3257, 3259, 3265, 3268, 3273, 3274, 3275, 3276, 3279, 3281, 3283, 3286, 3287, 3288, 3289, 3290, 3293, 3295, 3305, 3306, 3308, 3310, 3313, 3314, 3318, 3324, 3326, 3328, 3330, 3336, 3342, 3343, 3344, 3360, 3362, 3366, 3374, 3376, 3377, 3378, 3380, 3382, 3384, 3386, 3387, 3388, 3393, 3394, 3395, 3396, 3400, 3401, 3403, 3408, 3409, 3411, 3412, 3413, 3414, 3424, 3432, 3435, 3436, 3438, 3442, 3443, 3446, 3447, 3449, 3451, 3453, 3456, 3458, 3462, 3466, 3470, 3472, 3474, 3476, 3478, 3479, 3480, 3483, 3487, 3490, 3492, 3496, 3498, 3505, 3507, 3509, 3510, 3513, 3515, 3517, 3520, 3521, 3524, 3528, 3530, 3531, 3537, 3538, 3539, 3540, 3541, 3542, 3547, 3548, 3549, 3551, 3555, 3556, 3557, 3558, 3564, 3566, 3570, 3572, 3573, 3574, 3575, 3576, 3577, 3578, 3581, 3584, 3585, 3586, 3587, 3590, 3591, 3596, 3598, 3599, 3605, 3610, 3612, 3614, 3617, 3618, 3620, 3622, 3624, 3625, 3626, 3627, 3630, 3633, 3634, 3640, 3644, 3645, 3649, 3651, 3660, 3661, 3663, 3665, 3669, 3673, 3675, 3678, 3679, 3680, 3685, 3692, 3693, 3695, 3700, 3702, 3703, 3704, 3708, 3710, 3711, 3713, 3714, 3717, 3720, 3722, 3724, 3725, 3729, 3732, 3736, 3737, 3738, 3739, 3740, 3741, 3743, 3745, 3751, 3752, 3753, 3754, 3755, 3756, 3760, 3761, 3762, 3763, 3766, 3767, 3768, 3769, 3770, 3771, 3772, 3775, 3777, 3778, 3784, 3786, 3787, 3789, 3791, 3793, 3796, 3797, 3800, 3804, 3805, 3806, 3807, 3812, 3816, 3818, 3819, 3823, 3824, 3828, 3829, 3832, 3833, 3834, 3841, 3844, 3846, 3847, 3848, 3849, 3852, 3856, 3858, 3859, 3860, 3862, 3863, 3865, 3873, 3874, 3876, 3880, 3881, 3882, 3883, 3884, 3887, 3891, 3893, 3897, 3898, 3900, 3901, 3903, 3904, 3906, 3909, 3911, 3913, 3914, 3917, 3918, 3919, 3920, 3925, 3926, 3928, 3931, 3936, 3939, 3940, 3942, 3945, 3948, 3949, 3950, 3952, 3957, 3959, 3961, 3962, 3964, 3974, 3975, 3976, 3977, 3978, 3979, 3982, 3984, 3985, 3991, 3992, 3994, 3995, 3997, 3999, 4000, 4002, 4005, 4006, 4007, 4011, 4012, 4014, 4015, 4016, 4019, 4020, 4022, 4024, 4025, 4027, 4029, 4031, 4035, 4036, 4038, 4040, 4042, 4045, 4050, 4053, 4063, 4064, 4066, 4068, 4069, 4071, 4074, 4075, 4077, 4082, 4088, 4090, 4091, 4092, 4093, 4094, 4095, 4096, 4097, 4098, 4100, 4101, 4105, 4107, 4109, 4110, 4111, 4112, 4114, 4115, 4117, 4119, 4122, 4124, 4125, 4126, 4128, 4129, 4131, 4132, 4133, 4134, 4135, 4136, 4138, 4139, 4140, 4141, 4143, 4144, 4145, 4149, 4150, 4151, 4152, 4154, 4155, 4157, 4158, 4159, 4160, 4163, 4166, 4168, 4173, 4174, 4180, 4182, 4183, 4186, 4189, 4190, 4194, 4196, 4198, 4199, 4202, 4203, 4204, 4208, 4212, 4217, 4219, 4220, 4221, 4222, 4223, 4225, 4226, 4229, 4230, 4231, 4236, 4237, 4240, 4241, 4243, 4244, 4245, 4246, 4247, 4249, 4250, 4251, 4252, 4253, 4254, 4258, 4259, 4263, 4264, 4266, 4268, 4271, 4273, 4274, 4275, 4278, 4279, 4281, 4285, 4287, 4289, 4290, 4291, 4293, 4296, 4297, 4299, 4304, 4308, 4309, 4310, 4311, 4312, 4314, 4316, 4317, 4320, 4321, 4323, 4325, 4327, 4329, 4331, 4334, 4339, 4342, 4344, 4348, 4351, 4354, 4356, 4357, 4360, 4363, 4366, 4370, 4372, 4373, 4377, 4378, 4379, 4388, 4392, 4393, 4396, 4399, 4400, 4416, 4419, 4421, 4422, 4423, 4424, 4425, 4426, 4428, 4429, 4433, 4435, 4437, 4438, 4439, 4440, 4443, 4444, 4447, 4454, 4456, 4459, 4461, 4468, 4475, 4476, 4477, 4478, 4480, 4484, 4487, 4489, 4494, 4495, 4496, 4497, 4498, 4499, 4500, 4501, 4503, 4506, 4508, 4510, 4513, 4515, 4517, 4518, 4519, 4523, 4524, 4525, 4527, 4528, 4529, 4532, 4534, 4535, 4539, 4540, 4541, 4544, 4546, 4548, 4549, 4551, 4553, 4557, 4558, 4560, 4564, 4565, 4567, 4568, 4569, 4571, 4574, 4578, 4579, 4580, 4581, 4582, 4583, 4585, 4586, 4588, 4594, 4595, 4596, 4597, 4598, 4599, 4601, 4603, 4604, 4606, 4611, 4612, 4615, 4620, 4621, 4622, 4624, 4625, 4628, 4633, 4634, 4637, 4639, 4640, 4641, 4642, 4649, 4662, 4663, 4666, 4667, 4672, 4678, 4681, 4682, 4685, 4688, 4694, 4697, 4700, 4703, 4708, 4709, 4710, 4712, 4713, 4715, 4717, 4719, 4721, 4722, 4723, 4731, 4732, 4735, 4736, 4737, 4738, 4739, 4740, 4744, 4747, 4753, 4757, 4759, 4762, 4764, 4765, 4768, 4770, 4772, 4775, 4776, 4779, 4780, 4784, 4791, 4792, 4794, 4795, 4796, 4807, 4809, 4810, 4812, 4817, 4818, 4820, 4823, 4825, 4826, 4831, 4832, 4836, 4837, 4838, 4839, 4840, 4842, 4843, 4849, 4850, 4862, 4863, 4869, 4872, 4873, 4876, 4878, 4879, 4880, 4882, 4884, 4885, 4887, 4888, 4891, 4893, 4909, 4913, 4914, 4918, 4923, 4929, 4930, 4931, 4936, 4938, 4939, 4944, 4945, 4946, 4949, 4950, 4951, 4956, 4958, 4960, 4961, 4972, 4978, 4981, 4991, 4994, 4998, 5003, 5004, 5007, 5011, 5013, 5017, 5020, 5024, 5025, 5026, 5029, 5030, 5035, 5036, 5037, 5039, 5041, 5043, 5044, 5045, 5047, 5051, 5053, 5055, 5056, 5057, 5061, 5064, 5068, 5072, 5077, 5078, 5080, 5083, 5084, 5086, 5091, 5093, 5101, 5103, 5105, 5108, 5111, 5118, 5119, 5123, 5126, 5127, 5129, 5134, 5137, 5138, 5143, 5146, 5147, 5148, 5149, 5150, 5151, 5154, 5159, 5161, 5162, 5163, 5164, 5165, 5168, 5171, 5172, 5174, 5176, 5177, 5182, 5183, 5188, 5190, 5192, 5193, 5195, 5196, 5198, 5199, 5200, 5202, 5203, 5205, 5212, 5213, 5214, 5215, 5218, 5224, 5225, 5226, 5227, 5228, 5229, 5231, 5233, 5234, 5236, 5237, 5239, 5240, 5242, 5243, 5244, 5246, 5247, 5253, 5255, 5257, 5260, 5261, 5263, 5264, 5275, 5280, 5282, 5285, 5286, 5291, 5293, 5294, 5295, 5296, 5297, 5298, 5299, 5300, 5302, 5304, 5310, 5312, 5313, 5316, 5317, 5318, 5323, 5327, 5330, 5332, 5336, 5338, 5341, 5342, 5346, 5348, 5349, 5351, 5352, 5353, 5356, 5359, 5361, 5362, 5363, 5364, 5365, 5366, 5367, 5368, 5369, 5370, 5374, 5375, 5376, 5379, 5381, 5383, 5385, 5386, 5389, 5390, 5392, 5393, 5394, 5395, 5396, 5397, 5398, 5399, 5401, 5403, 5404, 5405, 5407, 5410, 5412, 5413, 5415, 5416, 5420, 5423, 5424, 5425, 5427, 5428, 5429, 5430, 5432, 5434, 5435, 5436, 5439, 5440, 5441, 5443, 5444, 5445, 5446, 5449, 5452, 5453, 5462, 5463, 5464, 5466, 5468, 5470, 5473, 5474, 5476, 5478, 5479, 5481, 5483, 5485, 5486, 5487, 5489, 5497, 5500, 5502, 5511, 5512, 5513, 5517, 5521, 5522, 5526, 5530, 5537, 5540, 5542, 5546, 5550, 5554, 5561, 5564, 5565, 5568, 5570, 5571, 5574, 5578, 5581, 5582, 5584, 5587, 5589, 5592, 5594, 5599, 5603, 5604, 5606, 5607, 5609, 5611, 5612, 5616, 5618, 5619, 5620, 5621, 5622, 5623, 5626, 5628, 5632, 5633, 5634, 5635, 5637, 5640, 5641, 5643, 5644, 5646, 5648, 5650, 5651, 5652, 5656, 5658, 5659, 5661, 5662, 5670, 5671, 5673, 5676, 5677, 5678, 5679, 5681, 5683, 5684, 5686, 5687, 5695, 5702, 5703, 5704, 5707, 5714, 5718, 5721, 5725, 5728, 5730, 5731, 5732, 5733, 5735, 5738, 5740, 5742, 5748, 5759, 5760, 5764, 5765, 5769, 5770, 5773, 5775, 5778, 5779, 5782, 5786, 5787, 5790, 5791, 5796, 5797, 5798, 5799, 5808, 5809, 5810, 5813, 5820, 5824, 5827, 5830, 5834, 5835, 5839, 5841, 5842, 5843, 5844, 5845, 5847, 5849, 5850, 5851, 5857, 5858, 5860, 5863, 5866, 5871, 5872, 5875, 5876, 5877, 5880, 5881, 5883, 5888, 5889, 5890, 5893, 5898, 5900, 5905, 5906, 5907, 5911, 5913, 5915, 5917, 5918, 5925, 5927, 5928, 5933, 5936, 5938, 5939, 5940, 5942, 5944, 5946, 5947, 5948, 5950, 5951, 5952, 5953, 5954, 5955, 5956, 5958, 5960, 5961, 5962, 5963, 5964, 5965, 5968, 5970, 5974, 5979, 5986, 5990, 5994, 5997, 6002, 6003, 6004, 6009, 6018, 6019, 6025, 6026, 6028, 6035, 6040, 6043, 6044, 6045, 6047, 6048, 6049, 6053, 6054, 6059, 6062, 6066, 6069, 6074, 6075, 6076, 6078, 6080, 6081, 6086, 6090, 6091, 6093, 6096, 6097, 6098, 6099, 6100, 6102, 6103, 6105, 6106, 6107, 6108, 6111, 6113, 6118, 6120, 6122, 6126, 6129, 6132, 6133, 6136, 6137, 6143, 6144, 6146, 6149, 6151, 6152, 6153, 6157, 6159, 6161, 6162, 6163, 6165, 6167, 6168, 6171, 6173, 6174, 6176, 6177, 6178, 6179, 6185, 6187, 6189, 6192, 6194, 6200, 6204, 6206, 6207, 6208, 6209, 6215, 6216, 6217, 6218, 6219, 6220, 6221, 6222, 6223, 6225, 6226, 6227, 6230, 6233, 6234, 6235, 6236, 6238, 6243, 6245, 6250, 6251, 6253, 6257, 6260, 6261, 6263, 6264, 6267, 6268, 6271, 6277, 6282, 6283, 6289, 6292, 6293, 6294, 6305, 6306, 6307, 6308, 6312, 6313, 6316, 6319, 6321, 6323, 6331, 6333, 6336, 6341, 6342, 6343, 6347, 6350, 6352, 6353, 6356, 6358, 6362, 6364, 6365, 6367, 6376, 6377, 6382, 6386, 6387, 6391, 6397, 6399, 6400, 6402, 6405, 6406, 6407, 6408, 6409, 6411, 6413, 6414, 6419, 6420, 6422, 6423, 6426, 6427, 6433, 6434, 6435, 6439, 6440, 6445, 6448, 6450, 6451, 6453, 6460, 6467, 6470, 6473, 6479, 6486, 6489, 6491, 6496, 6500, 6501, 6504, 6511, 6512, 6516, 6517, 6524, 6526, 6527, 6528, 6529, 6535, 6536, 6541, 6542, 6543, 6544, 6545, 6547, 6549, 6554, 6556, 6557, 6558, 6560, 6562, 6564, 6567, 6570, 6573, 6577, 6579, 6580, 6588, 6589, 6590, 6592, 6593, 6594, 6597, 6598, 6599, 6600, 6601, 6602, 6608, 6609, 6610, 6611, 6612, 6613, 6615, 6616, 6617, 6618, 6622, 6623, 6624, 6627, 6630, 6636, 6641, 6642, 6646, 6648, 6652, 6653, 6655, 6656, 6657, 6658, 6659, 6664, 6665, 6666, 6667, 6669, 6673, 6676, 6678, 6679, 6680, 6687, 6688, 6689, 6700, 6710, 6713, 6715, 6716, 6719, 6723, 6725, 6728, 6729, 6733, 6738, 6739, 6741, 6743, 6745, 6750, 6751, 6752, 6753, 6754, 6755, 6756, 6757, 6758, 6760, 6763, 6764, 6767, 6768, 6770, 6772, 6774, 6776, 6779, 6780, 6784, 6785, 6787, 6788, 6792, 6797, 6802, 6803, 6805, 6806, 6810, 6814, 6817, 6818, 6820, 6821, 6822, 6824, 6825, 6826, 6830, 6833, 6835, 6837, 6838, 6839, 6840, 6841, 6842, 6844, 6845, 6846, 6847, 6850, 6852, 6859, 6860, 6861, 6862, 6863, 6867, 6868, 6870, 6871, 6872, 6874, 6876, 6888, 6890, 6892, 6893, 6894, 6896, 6897, 6901, 6903, 6907, 6909, 6910, 6911, 6912, 6913, 6917, 6919, 6920, 6922, 6923, 6926, 6927, 6928, 6929, 6930, 6932, 6935, 6937, 6938, 6939, 6940, 6941, 6942, 6946, 6949, 6950, 6951, 6955, 6956, 6958, 6959, 6961, 6962, 6963, 6967, 6968, 6972, 6974, 6975, 6979, 6984, 6986, 6987, 6988, 6989, 6990, 6992, 6993, 6995, 6996, 6997, 6999, 7000, 7002, 7004, 7005, 7006, 7012, 7013, 7015, 7016, 7020, 7021, 7022, 7023, 7024, 7025, 7029, 7031, 7033, 7035, 7036, 7037, 7038, 7039, 7040, 7042, 7044, 7045, 7046, 7047, 7048, 7052, 7053, 7055, 7056, 7059, 7060, 7062, 7063, 7065, 7066, 7067, 7070, 7071, 7072, 7074, 7076, 7079, 7080, 7082, 7083, 7090, 7092, 7099, 7101, 7103, 7107, 7109, 7110, 7111, 7113, 7115, 7116, 7118, 7119, 7121, 7123, 7124, 7127, 7128, 7130, 7132, 7134, 7136, 7137, 7138, 7139, 7140, 7141, 7142, 7144, 7145, 7147, 7151, 7152, 7156, 7157, 7158, 7161, 7166, 7167, 7168, 7169, 7171, 7172, 7175, 7176, 7177, 7178, 7179, 7183, 7184, 7186, 7189, 7190, 7191, 7192, 7193, 7194, 7195, 7199, 7203, 7204, 7205, 7206, 7209, 7210, 7213, 7215, 7217, 7218, 7219, 7220, 7225, 7227, 7229, 7230, 7231, 7232, 7233, 7236, 7237, 7238, 7240, 7246, 7247, 7249, 7250, 7251, 7252, 7253, 7256, 7261, 7265, 7266, 7268, 7270, 7273, 7274, 7278, 7279, 7281, 7282, 7286, 7287, 7288, 7290, 7293, 7294, 7295, 7296, 7299, 7301, 7304, 7308, 7313, 7314, 7317, 7318, 7319, 7325, 7329, 7331, 7335, 7339, 7342, 7343, 7346, 7349, 7351, 7353, 7356, 7357, 7358, 7366, 7371, 7377, 7378, 7381, 7383, 7384, 7385, 7387, 7388, 7397, 7403, 7404, 7406, 7410, 7411, 7412, 7413, 7417, 7420, 7421, 7423, 7425, 7427, 7430, 7432, 7434, 7436, 7437, 7439, 7442, 7443, 7444, 7449, 7450, 7451, 7453, 7455, 7456, 7459, 7460, 7461, 7462, 7466, 7467, 7470, 7471, 7472, 7476, 7477, 7479, 7480, 7485, 7487, 7490, 7493, 7495, 7496, 7497, 7498, 7500, 7502, 7503, 7504, 7505, 7506, 7510, 7512, 7514, 7515, 7516, 7521, 7523, 7524, 7525, 7527, 7528, 7532, 7536, 7537, 7538, 7540, 7541, 7542, 7543, 7544, 7545, 7549, 7551, 7555, 7558, 7561, 7563, 7564, 7567, 7568, 7569, 7570, 7572, 7575, 7576, 7577, 7578, 7580, 7581, 7586, 7588, 7590, 7606, 7607, 7612, 7613, 7616, 7617, 7618, 7620, 7621, 7624, 7625, 7626, 7627, 7628, 7630, 7632, 7633, 7634, 7637, 7638, 7639, 7640, 7641, 7643, 7644, 7645, 7647, 7651, 7652, 7653, 7656, 7657, 7658, 7661, 7664, 7666, 7672, 7677, 7679, 7680, 7681, 7687, 7688, 7693, 7696, 7700, 7708, 7709, 7711, 7712, 7717, 7718, 7719, 7724, 7725, 7726, 7728, 7730, 7734, 7737, 7741, 7743, 7744, 7747, 7748, 7750, 7754, 7755, 7758, 7759, 7762, 7763, 7764, 7765, 7766, 7768, 7772, 7774, 7785, 7786, 7787, 7789, 7790, 7791, 7792, 7793, 7794, 7798, 7799, 7804, 7806, 7807, 7808, 7812, 7813, 7815, 7819, 7821, 7822, 7823, 7824, 7827, 7833, 7834, 7835, 7836, 7842, 7844, 7845, 7846, 7850, 7856, 7860, 7862, 7864, 7865, 7867, 7868, 7873, 7877, 7878, 7880, 7882, 7883, 7884, 7885, 7887, 7891, 7895, 7897, 7900, 7905, 7909, 7914, 7915, 7918, 7924, 7926, 7927, 7928, 7929, 7931, 7933, 7936, 7938, 7939, 7941, 7942, 7944, 7947, 7949, 7952, 7955, 7957, 7958, 7959, 7960, 7962, 7963, 7975, 7976, 7979, 7986, 7987, 7988, 7990, 7991, 7994, 7996, 7998, 8003, 8004, 8007, 8008, 8009, 8010, 8014, 8015, 8016, 8018, 8020, 8022, 8027, 8032, 8036, 8039, 8040, 8041, 8042, 8044, 8045, 8046, 8050, 8059, 8062, 8063, 8066, 8067, 8069, 8071, 8075, 8076, 8079, 8080, 8081, 8082, 8084, 8085, 8086, 8087, 8089, 8091, 8092, 8100, 8104, 8105, 8106, 8108, 8109, 8119, 8120, 8121, 8122, 8127, 8128, 8129, 8133, 8134, 8136, 8139, 8141, 8144, 8145, 8146, 8147, 8148, 8150, 8151, 8152, 8155, 8157, 8158, 8159, 8161, 8162, 8163, 8164, 8168, 8169, 8175, 8178, 8179, 8180, 8182, 8183, 8184, 8188, 8189, 8192, 8193, 8195, 8196, 8197, 8198, 8199, 8200, 8201, 8202, 8209, 8210, 8211, 8212, 8213, 8216, 8221, 8222, 8227, 8229, 8230, 8232, 8234, 8235, 8239, 8247, 8248, 8253, 8257, 8258, 8260, 8261, 8264, 8268, 8276, 8278, 8282, 8287, 8292, 8293, 8295, 8297, 8298, 8299, 8301, 8302, 8304, 8305, 8309, 8310, 8311, 8312, 8313, 8315, 8316, 8322, 8323, 8326, 8330, 8332, 8334, 8336, 8340, 8347, 8350, 8352, 8353, 8356, 8358, 8359, 8361, 8362, 8365, 8366, 8367, 8368, 8369, 8370, 8371, 8373, 8374, 8377, 8378, 8381, 8382, 8383, 8384, 8387, 8388, 8391, 8392, 8393, 8395, 8397, 8402, 8404, 8405, 8406, 8407, 8409, 8411, 8415, 8416, 8417, 8418, 8420, 8422, 8428, 8430, 8431, 8433, 8435, 8441, 8442, 8443, 8444, 8448, 8451, 8454, 8456, 8457, 8459, 8460, 8461, 8466, 8470, 8473, 8475, 8476, 8477, 8479, 8481, 8483, 8485, 8486, 8487, 8488, 8492, 8494, 8497, 8506, 8507, 8509, 8511, 8514, 8515, 8517, 8521, 8522, 8524, 8531, 8532, 8533, 8535, 8540, 8541, 8545, 8546, 8547, 8551, 8557, 8560, 8561, 8563, 8564, 8565, 8570, 8574, 8576, 8578, 8580, 8582, 8583, 8584, 8585, 8587, 8593, 8596, 8597, 8599, 8602, 8606, 8609, 8610, 8611, 8613, 8614, 8616, 8620, 8621, 8622, 8623, 8624, 8626, 8627, 8631, 8632, 8634, 8635, 8638, 8639, 8641, 8642, 8644, 8648, 8650, 8654, 8655, 8656, 8658, 8662, 8663, 8669, 8674, 8676, 8680, 8682, 8683, 8684, 8689, 8694, 8695, 8696, 8697, 8698, 8699, 8700, 8701, 8703, 8704, 8706, 8707, 8708, 8709, 8711, 8712, 8715, 8718, 8719, 8732, 8734, 8735, 8736, 8741, 8745, 8751, 8755, 8758, 8759, 8761, 8764, 8766, 8767, 8768, 8778, 8779, 8780, 8781, 8782, 8783, 8790, 8794, 8799, 8800, 8802, 8805, 8806, 8809, 8810, 8815, 8817, 8821, 8822, 8823, 8827, 8830, 8833, 8841, 8842, 8848, 8849, 8850, 8851, 8852, 8853, 8854, 8855, 8859, 8862, 8865, 8867, 8868, 8872, 8892, 8901, 8908, 8911, 8912, 8920, 8931, 8935, 8966, 8975, 8982, 8988, 8990, 8995, 9000, 9003, 9008, 9010, 9027, 9028, 9035, 9040, 9042, 9043, 9045, 9052, 9054, 9058, 9059, 9067, 9072, 9075, 9084, 9085, 9086, 9087, 9088, 9091, 9092, 9097, 9101, 9107, 9109, 9111, 9112, 9116, 9118, 9119, 9120, 9121, 9124, 9126, 9127, 9130, 9131, 9132, 9134, 9142, 9149, 9150, 9155, 9157, 9158, 9161, 9167, 9168, 9169, 9173, 9174, 9175, 9176, 9177, 9178, 9181, 9185, 9187, 9190, 9197, 9206, 9214, 9215, 9221, 9223, 9229, 9243, 9245, 9247, 9250, 9254, 9258, 9261, 9264, 9267, 9273, 9276, 9280, 9285, 9286, 9292, 9296, 9298, 9301, 9306, 9307, 9308, 9309, 9317, 9321, 9323, 9324, 9327, 9328, 9330, 9332, 9333, 9334, 9339, 9348, 9353, 9354, 9359, 9362, 9364, 9371, 9373, 9375, 9381, 9384, 9388, 9389, 9390, 9391, 9396, 9399, 9402, 9403, 9408, 9412, 9414, 9417, 9420, 9423, 9424, 9425, 9427, 9432, 9433, 9445, 9448, 9451, 9452, 9453, 9455, 9459, 9460, 9462, 9466, 9469, 9475, 9477, 9479, 9480, 9481, 9483, 9486, 9487, 9488, 9489, 9492, 9497, 9501, 9502, 9504, 9510, 9511, 9515, 9516, 9519, 9520, 9526, 9527, 9528, 9529, 9530, 9533, 9538, 9540, 9542, 9549, 9563, 9566, 9570, 9571, 9572, 9573, 9575, 9585, 9590, 9591, 9603, 9609, 9614, 9619, 9622, 9624, 9632, 9633, 9634, 9635, 9636, 9648, 9650, 9666, 9667, 9669, 9672, 9677, 9679, 9684, 9693, 9694, 9696, 9698, 9701, 9702, 9711, 9712, 9714, 9715, 9718, 9721, 9732, 9744, 9747, 9752, 9758, 9769, 9775, 9778, 9783, 9785, 9786, 9789, 9790, 9792, 9794, 9796, 9799, 9803, 9805, 9810, 9811, 9813, 9814, 9818, 9819, 9820, 9821, 9822, 9834, 9835, 9836, 9837, 9838, 9839, 9841, 9843, 9844, 9847, 9849, 9851, 9852, 9856, 9858, 9859, 9861, 9862, 9865, 9866, 9868, 9869, 9870, 9872, 9874, 9876, 9877, 9878, 9879, 9881, 9883, 9886, 9887, 9888, 9889, 9890, 9892, 9893, 9894, 9895, 9896, 9897, 9899, 9900, 9901, 9902, 9905, 9907, 9909, 9911, 9915, 9916, 9917, 9921, 9923, 9924, 9925, 9927, 9928, 9930, 9933, 9936, 9941, 9942, 9945, 9947, 9949, 9950, 9953, 9954, 9955, 9957, 9959, 9961, 9962, 9963, 9964, 9965, 9967, 9970, 9974, 9976, 9977, 9978, 9979, 9982, 9988, 9991, 9992, 9993, 9996, 9997, 10000, 10004, 10005, 10006, 10012, 10014, 10017, 10020, 10022, 10023, 10024, 10030, 10032, 10038, 10039, 10040, 10041, 10046, 10050, 10052, 10053, 10057, 10058, 10059, 10063, 10064, 10065, 10069, 10078, 10082, 10086, 10087, 10089, 10092, 10095, 10096, 10098, 10099, 10100, 10109, 10115, 10119, 10120, 10121, 10123, 10124, 10125, 10131, 10138, 10139, 10141, 10142, 10143, 10144, 10145, 10150, 10156, 10157, 10158, 10164, 10187, 10189, 10192, 10204, 10208, 10211, 10213, 10215, 10216, 10218, 10220, 10221, 10222, 10223, 10224, 10225, 10230, 10235, 10236, 10239, 10240, 10242, 10244, 10252, 10254, 10256, 10258, 10261, 10262, 10263, 10269, 10270, 10272, 10274, 10276, 10278, 10279, 10281, 10283, 10298, 10300, 10303, 10305, 10308, 10309, 10313, 10318, 10322, 10323, 10325, 10326, 10327, 10328, 10330, 10336, 10339, 10341, 10342, 10346, 10347, 10348, 10349, 10351, 10353, 10356, 10357, 10359, 10360, 10363, 10364, 10365, 10366, 10367, 10374, 10377, 10382, 10384, 10386, 10387, 10388, 10389, 10390, 10391, 10392, 10397, 10398, 10402, 10405, 10411, 10414, 10415, 10420, 10425, 10426, 10427, 10428, 10429, 10430, 10431, 10433, 10435, 10436, 10437, 10439, 10440, 10445, 10446, 10447, 10452, 10461, 10475, 10478, 10479, 10480, 10481, 10482, 10483, 10484, 10485, 10486, 10487, 10490, 10491, 10492, 10493, 10494, 10495, 10496, 10497, 10498, 10499, 10500, 10501, 10502, 10503, 10510, 10512, 10513, 10515, 10517, 10520, 10521, 10523, 10524, 10529, 10531, 10532, 10533, 10535, 10536, 10538, 10539, 10541, 10542, 10544, 10547, 10548, 10550, 10551, 10553, 10554, 10555, 10556, 10558, 10559, 10560, 10562, 10563, 10564, 10566, 10568, 10569, 10571, 10572, 10573, 10574, 10575, 10577, 10581, 10583, 10588, 10590, 10591, 10594, 10595, 10598, 10599, 10600, 10601, 10603, 10604, 10605, 10606, 10607, 10608, 10609, 10611, 10612, 10613, 10615, 10616, 10617, 10618, 10619, 10620, 10621, 10622, 10623, 10628, 10629, 10631, 10635, 10637, 10639, 10644, 10646, 10647, 10649, 10650, 10651, 10652, 10653, 10654, 10655, 10657, 10658, 10659, 10660, 10661, 10662, 10663, 10664, 10668, 10669, 10679, 10680, 10681, 10682, 10685, 10689, 10690, 10692, 10693, 10694, 10701, 10702, 10706, 10707, 10708, 10710, 10713, 10716, 10717, 10718, 10719, 10721, 10724, 10728, 10744, 10747, 10758, 10761, 10767])
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:20: RuntimeWarning: invalid value encountered in double_scalars
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:22: RuntimeWarning: invalid value encountered in double_scalars
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:18: RuntimeWarning: invalid value encountered in double_scalars

Analysis of control runs

Coverage


In [16]:
variable = 'Coverage'
sample = len(DD18_A [variable])*["DD18_A"]+len(DD18_D [variable])*["DD18_D"]+len(DD3_E [variable])*["DD3_E"]+len(DD12_E [variable])*["DD12_E"]+len(DD18_E [variable])*["DD18_E"]+len(DD24_E [variable])*["DD24_E"]+len(DD24crude_E [variable])*["DD24crude_E"]

overlay_table_concat = pd.DataFrame ({'Position':pd.concat([DD18_A['Position'],DD18_D['Position'],DD3_E['Position'],DD12_E['Position'],DD18_E['Position'],DD24_E['Position'],DD24crude_E['Position']]), variable:pd.concat([DD18_A [variable], DD18_D [variable], DD3_E [variable], DD12_E [variable], DD18_E [variable], DD24_E [variable], DD24crude_E [variable] ]), 'sample':sample})



sns.lmplot( x="Position", y=variable, data=overlay_table_concat, fit_reg=False, hue='sample', legend=False, size=7, aspect=2, lowess=True,scatter_kws={"s": 20})
plt.legend(loc='lower right')
plot_positions()
plot_genes()


Analysis of the frequency of the major variant


In [17]:
variable = 'Major_variant_frequency'
sample = len(DD18_A [variable])*["DD18_A"]+len(DD18_D [variable])*["DD18_D"]+len(DD3_E [variable])*["DD3_E"]+len(DD12_E [variable])*["DD12_E"]+len(DD18_E [variable])*["DD18_E"]+len(DD24_E [variable])*["DD24_E"]+len(DD24crude_E [variable])*["DD24crude_E"]

overlay_table_concat = pd.DataFrame ({'Position':pd.concat([DD18_A['Position'],DD18_D['Position'],DD3_E['Position'],DD12_E['Position'],DD18_E['Position'],DD24_E['Position'],DD24crude_E['Position']]), variable:pd.concat([DD18_A [variable], DD18_D [variable], DD3_E [variable], DD12_E [variable], DD18_E [variable], DD24_E [variable], DD24crude_E [variable] ]), 'sample':sample})



sns.lmplot( x="Position", y=variable, data=overlay_table_concat, fit_reg=False, hue='sample', legend=False, size=7, aspect=2, lowess=True,scatter_kws={"s": 20})
plt.legend(loc='lower right')
plot_positions()
plot_genes()


Focusing on DD*_E


In [18]:
variable = 'Major_variant_frequency'
sample = len(DD3_E [variable])*["DD3_E"]+len(DD12_E [variable])*["DD12_E"]+len(DD18_E [variable])*["DD18_E"]+len(DD24_E [variable])*["DD24_E"]+len(DD24crude_E [variable])*["DD24crude_E"]

overlay_table_concat = pd.DataFrame ({'Position':pd.concat([DD3_E['Position'],DD12_E['Position'],DD18_E['Position'],DD24_E['Position'],DD24crude_E['Position']]), variable:pd.concat([DD3_E [variable], DD12_E [variable], DD18_E [variable], DD24_E [variable], DD24crude_E [variable] ]), 'sample':sample})



sns.lmplot( x="Position", y=variable, data=overlay_table_concat, fit_reg=False, hue='sample', legend=False, size=7, aspect=2, lowess=True,scatter_kws={"s": 20})
plt.legend(loc='lower right')
plot_positions()
plot_genes()


Variants that increase in frequency in DD*_E


In [19]:
tables_control = [DD3_E, DD12_E, DD18_E, DD24_E]
increasing_control = get_increasing_variants(tables_control)
print("There are "+str(len(increasing_control))+" positions that rise in frequency.")
print("Those are:")
print(increasing_control.keys())


There are 468 positions that rise in frequency.
Those are:
dict_keys([31, 43, 47, 53, 72, 74, 98, 100, 109, 187, 188, 200, 209, 216, 235, 240, 250, 270, 281, 302, 304, 320, 334, 367, 376, 384, 396, 430, 433, 499, 503, 518, 533, 534, 550, 561, 626, 643, 646, 647, 685, 696, 701, 709, 746, 801, 816, 820, 831, 855, 874, 878, 899, 903, 919, 965, 969, 1020, 1131, 1194, 1201, 1208, 1267, 1279, 1300, 1305, 1329, 1332, 1341, 1359, 1409, 1424, 1439, 1455, 1460, 1466, 1512, 1525, 1546, 1602, 1660, 1667, 1670, 1707, 1717, 1744, 1751, 1763, 1783, 1822, 1844, 1963, 1964, 1967, 1969, 1986, 2032, 2037, 2046, 2052, 2108, 2110, 2137, 2141, 2145, 2151, 2153, 2155, 2156, 2193, 2307, 2332, 2406, 2412, 2450, 2482, 2493, 2521, 2565, 2597, 2663, 2672, 2674, 2679, 2692, 2694, 2726, 2769, 2782, 2797, 2814, 2865, 2893, 2911, 2912, 2944, 2952, 2974, 3013, 3051, 3061, 3116, 3136, 3187, 3209, 3212, 3224, 3227, 3273, 3307, 3322, 3347, 3390, 3394, 3405, 3431, 3446, 3462, 3521, 3566, 3590, 3607, 3655, 3681, 3689, 3695, 3724, 3744, 3787, 3828, 3864, 3895, 3965, 4000, 4011, 4018, 4032, 4077, 4078, 4102, 4214, 4244, 4271, 4276, 4286, 4289, 4336, 4351, 4354, 4358, 4370, 4393, 4397, 4405, 4416, 4504, 4568, 4657, 4659, 4680, 4728, 4733, 4793, 4812, 4820, 4878, 4881, 4945, 4946, 4951, 4984, 4992, 5007, 5008, 5010, 5015, 5034, 5035, 5075, 5081, 5084, 5086, 5114, 5116, 5131, 5147, 5171, 5190, 5205, 5218, 5235, 5276, 5312, 5319, 5326, 5328, 5330, 5341, 5351, 5374, 5416, 5450, 5454, 5467, 5565, 5582, 5611, 5615, 5628, 5635, 5672, 5680, 5716, 5728, 5753, 5814, 5850, 5954, 5982, 5992, 6035, 6112, 6125, 6137, 6263, 6299, 6396, 6428, 6438, 6455, 6538, 6561, 6568, 6617, 6625, 6646, 6659, 6729, 6742, 6763, 6777, 6793, 6803, 6822, 6874, 6880, 6931, 6981, 7018, 7035, 7037, 7064, 7072, 7078, 7139, 7151, 7152, 7172, 7181, 7207, 7218, 7277, 7305, 7314, 7345, 7348, 7360, 7367, 7375, 7379, 7382, 7400, 7403, 7482, 7484, 7489, 7490, 7491, 7509, 7597, 7603, 7654, 7667, 7668, 7669, 7670, 7674, 7679, 7691, 7755, 7816, 7842, 7844, 7856, 7871, 7936, 7945, 8005, 8040, 8051, 8068, 8105, 8130, 8131, 8165, 8170, 8171, 8184, 8206, 8243, 8268, 8320, 8337, 8374, 8424, 8517, 8557, 8608, 8612, 8738, 8762, 8849, 8958, 8985, 9002, 9045, 9059, 9064, 9065, 9068, 9076, 9091, 9097, 9105, 9107, 9114, 9115, 9122, 9123, 9181, 9199, 9242, 9249, 9268, 9290, 9302, 9347, 9351, 9354, 9391, 9414, 9416, 9424, 9428, 9449, 9469, 9470, 9477, 9520, 9553, 9558, 9590, 9598, 9627, 9642, 9645, 9659, 9675, 9692, 9720, 9721, 9724, 9771, 9780, 9806, 9807, 9829, 9965, 9994, 10032, 10042, 10045, 10047, 10074, 10101, 10122, 10240, 10275, 10289, 10298, 10300, 10303, 10316, 10333, 10351, 10353, 10381, 10394, 10398, 10399, 10424, 10439, 10450, 10463, 10484, 10490, 10499, 10504, 10514, 10516, 10537, 10546, 10552, 10567, 10576, 10618, 10619, 10625, 10630, 10652, 10663, 10705, 10718, 10719, 10749, 10756, 10770, 10776])

Analysis of TLR3 treated runs

Coverage


In [20]:
variable = 'Coverage'
sample = len(TD18_A [variable])*["TD18_A"]+len(TD9_D [variable])*["TD9_D"]+len(TD12_D [variable])*["TD12_D"]+len(TD18_D [variable])*["TD18_D"]+len(TD24_D [variable])*["TD24_D"]+len(TD9_E [variable])*["TD9_E"]+len(TD12_E [variable])*["TD12_E"]+len(TD18_E [variable])*["TD18_E"]+len(TD24_E [variable])*["TD24_E"]


overlay_table_concat = pd.DataFrame ({'Position':pd.concat([TD18_A['Position'],TD9_D['Position'],TD12_D['Position'],TD18_D['Position'],TD24_D['Position'],TD9_E['Position'],TD12_E['Position'],TD18_E['Position'],TD24_E['Position']]), variable:pd.concat([TD18_A[variable],TD9_D[variable],TD12_D[variable],TD18_D[variable],TD24_D[variable],TD9_E[variable],TD12_E[variable],TD18_E[variable],TD24_E[variable] ]), 'sample':sample})



sns.lmplot( x="Position", y=variable, data=overlay_table_concat, fit_reg=False, hue='sample', legend=False, size=7, aspect=2, lowess=True,scatter_kws={"s": 20})
plt.legend(loc='lower right')
plot_positions()
plot_genes()


Analysis of the frequency of the major variant


In [21]:
variable = 'Major_variant_frequency'
sample = len(TD18_A [variable])*["TD18_A"]+len(TD9_D [variable])*["TD9_D"]+len(TD12_D [variable])*["TD12_D"]+len(TD18_D [variable])*["TD18_D"]+len(TD24_D [variable])*["TD24_D"]+len(TD9_E [variable])*["TD9_E"]+len(TD12_E [variable])*["TD12_E"]+len(TD18_E [variable])*["TD18_E"]+len(TD24_E [variable])*["TD24_E"]


overlay_table_concat = pd.DataFrame ({'Position':pd.concat([TD18_A['Position'],TD9_D['Position'],TD12_D['Position'],TD18_D['Position'],TD24_D['Position'],TD9_E['Position'],TD12_E['Position'],TD18_E['Position'],TD24_E['Position']]), variable:pd.concat([TD18_A[variable],TD9_D[variable],TD12_D[variable],TD18_D[variable],TD24_D[variable],TD9_E[variable],TD12_E[variable],TD18_E[variable],TD24_E[variable] ]), 'sample':sample})



sns.lmplot( x="Position", y=variable, data=overlay_table_concat, fit_reg=False, hue='sample', legend=False, size=7, aspect=2, lowess=True,scatter_kws={"s": 20})
plt.legend(loc='lower right')
plot_positions()
plot_genes()


Variants that increase in frequency in TD*_D


In [22]:
tables_TD = [TD9_D, TD12_D, TD18_D, TD24_D]
increasing_TD = get_increasing_variants(tables_TD)
print("There are "+str(len(increasing_TD))+" positions that rise in frequency.")
print("Those are:")
print(increasing_TD.keys())


/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:16: RuntimeWarning: invalid value encountered in double_scalars
  app.launch_new_instance()
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:20: RuntimeWarning: invalid value encountered in double_scalars
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:22: RuntimeWarning: invalid value encountered in double_scalars
There are 1173 positions that rise in frequency.
Those are:
dict_keys([68, 96, 109, 228, 249, 269, 271, 283, 288, 290, 291, 294, 303, 308, 311, 313, 317, 324, 325, 326, 330, 335, 339, 347, 349, 352, 366, 376, 377, 386, 408, 419, 426, 427, 442, 453, 454, 463, 477, 489, 490, 505, 541, 542, 556, 578, 592, 598, 615, 683, 688, 722, 723, 727, 744, 771, 782, 790, 798, 825, 840, 859, 860, 929, 930, 943, 959, 974, 977, 983, 1001, 1014, 1015, 1016, 1018, 1021, 1022, 1023, 1026, 1027, 1028, 1030, 1031, 1032, 1033, 1034, 1038, 1039, 1041, 1042, 1043, 1045, 1047, 1048, 1049, 1050, 1052, 1053, 1055, 1056, 1057, 1060, 1061, 1063, 1064, 1065, 1067, 1068, 1069, 1070, 1071, 1075, 1076, 1080, 1082, 1083, 1085, 1086, 1087, 1088, 1089, 1091, 1093, 1095, 1096, 1097, 1098, 1100, 1101, 1102, 1104, 1105, 1107, 1109, 1111, 1113, 1114, 1116, 1119, 1121, 1122, 1152, 1158, 1165, 1174, 1176, 1180, 1190, 1194, 1202, 1205, 1206, 1207, 1208, 1210, 1212, 1213, 1216, 1217, 1218, 1221, 1224, 1226, 1227, 1228, 1229, 1230, 1233, 1234, 1237, 1238, 1243, 1244, 1294, 1305, 1306, 1319, 1346, 1368, 1394, 1408, 1464, 1472, 1490, 1497, 1512, 1520, 1528, 1573, 1578, 1652, 1670, 1678, 1696, 1716, 1724, 1726, 1844, 1883, 1888, 1906, 1925, 1930, 1936, 1995, 2007, 2027, 2038, 2069, 2078, 2102, 2106, 2122, 2186, 2189, 2193, 2227, 2234, 2257, 2287, 2291, 2292, 2301, 2317, 2318, 2347, 2399, 2406, 2409, 2413, 2417, 2419, 2422, 2423, 2425, 2428, 2429, 2431, 2434, 2435, 2438, 2476, 2477, 2480, 2488, 2497, 2507, 2517, 2520, 2523, 2536, 2540, 2542, 2543, 2546, 2547, 2549, 2550, 2552, 2555, 2556, 2557, 2559, 2561, 2563, 2564, 2565, 2568, 2569, 2585, 2586, 2588, 2589, 2598, 2599, 2604, 2608, 2609, 2611, 2612, 2613, 2616, 2617, 2619, 2621, 2622, 2623, 2625, 2627, 2628, 2630, 2631, 2632, 2634, 2641, 2644, 2674, 2678, 2680, 2681, 2693, 2703, 2711, 2714, 2715, 2742, 2743, 2761, 2820, 2848, 2870, 2876, 2877, 2901, 2906, 2912, 2921, 2923, 2924, 2925, 2927, 2928, 2932, 2936, 2937, 2938, 2939, 2965, 2966, 2967, 2968, 2970, 2971, 2972, 2974, 2975, 2976, 2977, 2978, 2979, 2980, 2981, 2982, 2983, 2985, 2986, 2987, 2989, 2990, 2991, 2992, 2994, 2995, 2996, 2997, 2998, 2999, 3000, 3002, 3003, 3004, 3006, 3009, 3010, 3012, 3013, 3015, 3016, 3017, 3018, 3019, 3020, 3021, 3022, 3024, 3026, 3027, 3029, 3030, 3031, 3033, 3034, 3036, 3038, 3039, 3046, 3054, 3078, 3082, 3088, 3089, 3093, 3100, 3101, 3102, 3105, 3115, 3116, 3117, 3122, 3131, 3135, 3136, 3137, 3140, 3142, 3144, 3147, 3148, 3149, 3150, 3151, 3153, 3154, 3159, 3160, 3161, 3171, 3175, 3180, 3184, 3189, 3190, 3193, 3194, 3196, 3197, 3199, 3200, 3201, 3203, 3204, 3205, 3206, 3212, 3221, 3237, 3243, 3265, 3283, 3287, 3290, 3303, 3328, 3330, 3331, 3337, 3338, 3339, 3343, 3344, 3345, 3347, 3348, 3351, 3352, 3354, 3355, 3358, 3359, 3361, 3364, 3365, 3366, 3368, 3369, 3370, 3374, 3375, 3376, 3378, 3385, 3386, 3387, 3388, 3389, 3390, 3392, 3403, 3437, 3443, 3446, 3452, 3463, 3468, 3469, 3473, 3539, 3558, 3559, 3605, 3621, 3632, 3638, 3653, 3666, 3674, 3678, 3695, 3719, 3725, 3731, 3744, 3750, 3795, 3802, 3850, 3869, 3990, 4019, 4045, 4048, 4052, 4073, 4085, 4107, 4112, 4124, 4139, 4151, 4154, 4163, 4208, 4213, 4243, 4247, 4279, 4302, 4336, 4377, 4385, 4391, 4420, 4429, 4458, 4461, 4465, 4468, 4473, 4485, 4514, 4521, 4532, 4550, 4551, 4555, 4556, 4558, 4560, 4561, 4562, 4573, 4583, 4584, 4586, 4591, 4596, 4598, 4613, 4615, 4616, 4618, 4619, 4621, 4623, 4624, 4625, 4631, 4632, 4633, 4634, 4635, 4638, 4639, 4640, 4643, 4647, 4650, 4652, 4662, 4680, 4682, 4687, 4691, 4696, 4701, 4708, 4710, 4714, 4723, 4738, 4739, 4752, 4756, 4789, 4794, 4795, 4809, 4811, 4814, 4815, 4824, 4848, 4862, 4877, 4881, 4902, 4909, 4921, 4935, 4937, 4951, 4955, 4966, 4968, 4975, 4976, 4981, 5072, 5084, 5136, 5139, 5257, 5267, 5276, 5292, 5310, 5377, 5380, 5388, 5390, 5414, 5420, 5422, 5424, 5429, 5433, 5435, 5444, 5447, 5452, 5460, 5468, 5471, 5482, 5484, 5485, 5486, 5488, 5491, 5493, 5494, 5496, 5497, 5498, 5499, 5500, 5502, 5503, 5504, 5505, 5506, 5507, 5508, 5509, 5510, 5512, 5515, 5518, 5521, 5523, 5526, 5531, 5532, 5533, 5534, 5535, 5538, 5539, 5540, 5541, 5542, 5546, 5547, 5550, 5556, 5564, 5565, 5569, 5570, 5571, 5575, 5576, 5577, 5578, 5581, 5583, 5584, 5587, 5589, 5591, 5593, 5594, 5596, 5601, 5605, 5608, 5687, 5688, 5689, 5692, 5693, 5694, 5695, 5696, 5697, 5698, 5699, 5700, 5701, 5703, 5704, 5705, 5706, 5707, 5708, 5709, 5710, 5711, 5714, 5715, 5716, 5717, 5718, 5722, 5724, 5725, 5726, 5727, 5728, 5729, 5730, 5731, 5732, 5734, 5735, 5739, 5740, 5762, 5766, 5791, 5804, 5806, 5811, 5815, 5821, 5823, 5827, 5834, 5836, 5869, 5887, 5904, 5978, 5981, 5996, 5999, 6009, 6030, 6057, 6101, 6110, 6165, 6224, 6229, 6283, 6308, 6323, 6326, 6337, 6369, 6384, 6392, 6400, 6408, 6415, 6423, 6425, 6432, 6433, 6434, 6437, 6439, 6440, 6446, 6447, 6448, 6449, 6473, 6486, 6497, 6521, 6528, 6538, 6543, 6554, 6567, 6575, 6584, 6589, 6606, 6633, 6644, 6650, 6664, 6686, 6769, 6778, 6782, 6791, 6792, 6804, 6807, 6813, 6869, 6889, 6937, 6957, 6973, 6984, 6985, 6988, 6989, 7052, 7064, 7078, 7157, 7166, 7202, 7224, 7227, 7240, 7254, 7268, 7278, 7283, 7367, 7384, 7390, 7415, 7455, 7467, 7493, 7497, 7506, 7514, 7516, 7520, 7547, 7579, 7583, 7607, 7659, 7688, 7699, 7705, 7706, 7713, 7722, 7729, 7730, 7731, 7734, 7740, 7741, 7745, 7746, 7748, 7749, 7750, 7752, 7755, 7756, 7757, 7760, 7762, 7768, 7771, 7773, 7776, 7778, 7779, 7780, 7781, 7792, 7793, 7814, 7828, 7840, 7846, 7860, 7892, 7913, 7925, 7928, 7930, 7931, 7932, 7934, 7935, 7937, 7939, 7979, 7981, 7986, 8015, 8067, 8069, 8094, 8155, 8211, 8220, 8253, 8259, 8290, 8294, 8305, 8338, 8349, 8359, 8433, 8438, 8453, 8503, 8510, 8513, 8532, 8590, 8595, 8610, 8611, 8620, 8632, 8643, 8652, 8656, 8659, 8663, 8684, 8690, 8716, 8724, 8726, 8740, 8770, 8773, 8775, 8776, 8778, 8787, 8788, 8789, 8792, 8794, 8795, 8801, 8804, 8811, 8835, 8848, 8854, 8867, 8877, 8881, 8882, 8885, 8886, 8888, 8889, 8894, 8895, 8908, 8912, 8915, 8917, 8920, 8922, 8924, 8926, 8927, 8930, 8936, 8938, 8939, 8942, 8945, 8946, 8948, 8950, 8952, 8955, 8966, 8970, 8971, 8983, 8984, 8986, 9001, 9004, 9017, 9019, 9021, 9023, 9026, 9029, 9069, 9074, 9079, 9087, 9098, 9101, 9132, 9147, 9219, 9229, 9240, 9247, 9267, 9272, 9274, 9293, 9314, 9325, 9328, 9333, 9337, 9349, 9353, 9369, 9371, 9393, 9399, 9411, 9434, 9455, 9459, 9461, 9462, 9464, 9465, 9466, 9469, 9470, 9473, 9477, 9492, 9497, 9501, 9513, 9516, 9532, 9559, 9561, 9587, 9596, 9602, 9632, 9644, 9656, 9658, 9660, 9661, 9664, 9665, 9666, 9676, 9681, 9684, 9692, 9693, 9696, 9699, 9702, 9705, 9706, 9707, 9710, 9711, 9717, 9720, 9722, 9725, 9727, 9728, 9729, 9730, 9733, 9734, 9740, 9741, 9745, 9746, 9751, 9752, 9755, 9756, 9757, 9759, 9762, 9765, 9766, 9777, 9779, 9786, 9821, 9855, 9861, 9862, 9870, 9873, 9884, 9886, 9894, 9895, 9896, 9907, 9911, 9923, 9925, 9927, 9933, 9934, 9938, 9944, 9951, 9958, 9960, 9964, 9969, 9991, 10046, 10048, 10085, 10110, 10146, 10153, 10185, 10191, 10252, 10270, 10293, 10306, 10314, 10341, 10347, 10412, 10424, 10425, 10426, 10427, 10471, 10474, 10476, 10489, 10491, 10512, 10530, 10559, 10665, 10695, 10715, 10716, 10774])
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:18: RuntimeWarning: invalid value encountered in double_scalars

Variants that increase in frequency in TD*_E


In [23]:
tables_TE = [TD9_E, TD12_E, TD18_E, TD24_E]
increasing_TE = get_increasing_variants(tables_TE)
print("There are "+str(len(increasing_TE))+" positions that rise in frequency.")
print("Those are:")
print(increasing_TE.keys())


/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:16: RuntimeWarning: invalid value encountered in double_scalars
  app.launch_new_instance()
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:20: RuntimeWarning: invalid value encountered in double_scalars
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:22: RuntimeWarning: invalid value encountered in double_scalars
There are 446 positions that rise in frequency.
Those are:
dict_keys([33, 34, 39, 40, 50, 60, 63, 64, 76, 90, 96, 112, 113, 185, 188, 214, 239, 264, 269, 271, 309, 338, 340, 362, 366, 390, 401, 458, 503, 505, 518, 573, 655, 670, 689, 705, 714, 730, 733, 739, 747, 752, 773, 778, 779, 787, 810, 823, 824, 827, 848, 859, 881, 896, 927, 928, 947, 986, 1090, 1140, 1218, 1253, 1280, 1283, 1310, 1317, 1331, 1332, 1348, 1384, 1395, 1427, 1430, 1438, 1442, 1464, 1530, 1569, 1576, 1578, 1580, 1582, 1593, 1594, 1596, 1597, 1615, 1713, 1768, 1773, 1775, 1785, 1786, 1810, 1827, 1828, 1841, 1880, 1935, 1949, 1968, 1971, 1982, 1987, 2008, 2018, 2027, 2046, 2052, 2074, 2085, 2141, 2189, 2200, 2211, 2232, 2242, 2252, 2267, 2287, 2326, 2340, 2360, 2384, 2398, 2490, 2508, 2525, 2564, 2636, 2638, 2651, 2679, 2699, 2703, 2704, 2733, 2757, 2777, 2787, 2791, 2810, 2811, 2929, 3051, 3094, 3216, 3249, 3253, 3267, 3313, 3321, 3324, 3335, 3337, 3439, 3452, 3480, 3505, 3519, 3523, 3525, 3544, 3555, 3561, 3570, 3577, 3581, 3582, 3594, 3597, 3608, 3617, 3625, 3653, 3675, 3681, 3692, 3695, 3698, 3705, 3711, 3720, 3731, 3734, 3781, 3783, 3794, 3800, 3825, 3827, 3828, 3836, 3838, 3857, 3858, 3878, 3905, 3937, 3959, 3960, 3965, 3975, 3976, 3986, 4012, 4084, 4121, 4135, 4199, 4242, 4261, 4263, 4337, 4345, 4386, 4426, 4551, 4606, 4607, 4608, 4623, 4670, 4675, 4704, 4707, 4814, 4897, 4938, 4962, 4971, 4990, 5017, 5032, 5045, 5062, 5066, 5079, 5091, 5095, 5143, 5179, 5190, 5200, 5223, 5224, 5235, 5249, 5266, 5285, 5300, 5355, 5357, 5374, 5414, 5662, 5677, 5678, 5680, 5683, 5684, 5842, 5845, 5955, 6056, 6104, 6153, 6179, 6185, 6199, 6223, 6340, 6396, 6410, 6475, 6558, 6628, 6629, 6633, 6642, 6665, 6759, 6785, 6792, 6800, 6822, 6836, 6845, 6857, 6861, 6864, 6887, 6892, 6898, 6902, 6906, 6914, 6958, 6978, 7046, 7104, 7140, 7147, 7172, 7173, 7178, 7184, 7186, 7201, 7222, 7223, 7225, 7255, 7313, 7328, 7343, 7371, 7386, 7393, 7394, 7419, 7437, 7446, 7464, 7529, 7556, 7579, 7618, 7648, 7698, 7871, 7872, 7978, 8027, 8065, 8070, 8071, 8086, 8122, 8135, 8139, 8143, 8162, 8176, 8177, 8187, 8271, 8372, 8466, 8538, 8575, 8596, 8617, 8641, 8658, 8669, 8671, 8675, 8690, 8691, 8702, 8735, 8739, 8748, 8799, 8818, 8829, 8858, 8860, 8867, 8974, 8994, 8999, 9013, 9021, 9038, 9043, 9046, 9052, 9054, 9069, 9089, 9105, 9106, 9184, 9309, 9322, 9351, 9365, 9371, 9373, 9390, 9392, 9413, 9424, 9425, 9449, 9450, 9455, 9483, 9485, 9490, 9499, 9508, 9517, 9559, 9572, 9592, 9602, 9606, 9686, 9689, 9709, 9726, 9748, 9796, 9828, 9946, 9971, 9977, 10006, 10012, 10253, 10298, 10316, 10320, 10328, 10332, 10348, 10351, 10354, 10400, 10410, 10575, 10585, 10592, 10614, 10662, 10665, 10711, 10719, 10732, 10738, 10761, 10766, 10769])
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:18: RuntimeWarning: invalid value encountered in double_scalars

Table with positions requested by Vincent


In [40]:
toVincent = pd.DataFrame()
allExps = [clone12,clone15,clone19,DD18_A,DD18_D,DD3_E,DD12_E,DD18_E,DD24_E,DD24crude_E,TD18_A,TD9_D,TD12_D,TD18_D,TD24_D,TD9_E,TD12_E,TD18_E,TD24_E]
print(len(allExps))

for d in allExps:
#    print(d.loc[2340])
#    print(d.loc[7172])
    temp_316 = d[d['Position']==316]    
    temp_2340 = d[d['Position']==2340]
    temp_7172 = d[d['Position']==7172]
    temp_9165 = d[d['Position']==9165]    
    toVincent=pd.concat([toVincent,temp_316])
    toVincent=pd.concat([toVincent,temp_2340])
    toVincent=pd.concat([toVincent,temp_7172])
    toVincent=pd.concat([toVincent,temp_9165])
    
expnames=["clone12","clone15","clone19","DD18_A","DD18_D","DD3_E","DD12_E","DD18_E","DD24_E","DD24crude_E","TD18_A","TD9_D","TD12_D","TD18_D","TD24_D","TD9_E","TD12_E","TD18_E","TD24_E"]
expnames4 = ["clone12"]*4,["clone15"]*4,["clone19"]*4,["DD18_A"]*4,["DD18_D"]*4,["DD3_E"]*4,["DD12_E"]*4,["DD18_E"]*4,["DD24_E"]*4,["DD24crude_E"]*4,["TD18_A"]*4,["TD9_D"]*4,["TD12_D"]*4,["TD18_D"]*4,["TD24_D"]*4,["TD9_E"]*4,["TD12_E"]*4,["TD18_E"]*4,["TD24_E"]*4,

expnames = [item for sublist in expnames4 for item in sublist]
print(len(expnames))
toVincent['expName']=expnames
toVincent.to_csv( "pos_316_2340_7172_9165_2ndLibrary.csv")
print(temp_7172)


19
76
      Unnamed: 0  Position  As     Cs  Gs    Ts  Ns  Coverage  \
7172        7172      7172   7  20295   9  2958   0     23269   

      As_quality_corrected  Cs_quality_corrected            ...              \
7172              6.965385          20290.646378            ...               

      Second_variant_frequency  Second_variant_frequency_quality_corrected  \
7172                  0.127122                                    0.127121   

      Mean_probability_of_sequencing_error  \
7172                              0.000169   

     Expected_number_of_sequencing_errors  Consensus_aa  Secondbase_aa  Phase  \
7172                             3.925456             H              Y    3.0   

       null   is_synonymous  1_major_variant_frequency  
7172  False  non-synonymous                     0.1278  

[1 rows x 29 columns]


IGNORE EVERYTHING BELOW THIS LINE



Frequency of the major base


In [13]:
sns.lmplot( x="Position", y="Major_variant_frequency_quality_corrected", data=cirseq, fit_reg=False, legend=False, size=7, aspect=2, lowess=True,scatter_kws={"s": 20})
plt.legend(loc='lower right')
plot_positions()
plot_genes()


Positions with frequency of the major variant below 0.95:


In [14]:
cirseq['Position'][cirseq['Major_variant_frequency_quality_corrected']<0.95]


Out[14]:
0            0
1            1
2            2
910        910
926        926
1785      1785
10776    10776
10779    10779
10801    10801
10802    10802
10803    10803
10804    10804
10805    10805
10806    10806
Name: Position, dtype: int64

There is already position 1785... What are its states?


In [15]:
print("Position 1785: ")
print ("Major variant: "+cirseq['Major_variant'][1785])
print ("Second variant: "+cirseq['Second_variant'][1785])
print ("Major variant, aa: "+cirseq['consensus_aa'][1785])
print ("Second variant, aa: "+cirseq['secondbase_aa'][1785])

print("Frequency of the major variant: "+str(cirseq['Major_variant_frequency_quality_corrected'][1785]))
print("Frequency of the second variant: "+str(cirseq['Second_variant_frequency_quality_corrected'][1785]))


Position 1785: 
Major variant: C
Second variant: T
Major variant, aa: A
Second variant, aa: V
Frequency of the major variant: 0.870546
Frequency of the second variant: 0.125279

Plot of the diversity in the Cirseq experiment


In [16]:
cirseq['1_major_variant_frequency'] = 1.0 - cirseq['Major_variant_frequency_quality_corrected']

#f, ax = plt.subplots(figsize=(10, 7))
#ax.set(yscale="log")

lm=sns.lmplot( x="Position", y="1_major_variant_frequency", data=cirseq, fit_reg=False, legend=False, size=7, aspect=2, scatter_kws={"s": 10})
axes = lm.axes
axes[0,0].set_ylim(0.001,0.05)
axes[0,0].set(yscale="log")

plt.legend(loc='lower right')


Here the distribution of diversity seems unimodal.

Comparison with and without reamplification


In [17]:
table_for_correlations = pd.DataFrame ({'51A_Cov':DD51_A ['Coverage'],'51ANR_Cov':DD51_A_no_reamp['Coverage'], '51A_MV':DD51_A ['Major_variant_frequency_quality_corrected'], '51ANR_MV':DD51_A_no_reamp ['Major_variant_frequency_quality_corrected'] })
table_for_correlations_highCov = table_for_correlations.loc[ (table_for_correlations['51A_Cov'] > 1000) & (table_for_correlations['51ANR_Cov'] > 1000) ]

sns.pairplot(table_for_correlations_highCov, vars=['51A_Cov','51ANR_Cov','51A_MV','51ANR_MV'], kind="scatter")


Out[17]:
<seaborn.axisgrid.PairGrid at 0x7fa651233208>

Focus on the correlation between major variant frequencies


In [18]:
table_for_correlations_highFreq = table_for_correlations.loc[ (table_for_correlations['51A_MV'] > 0.1) & (table_for_correlations['51ANR_MV'] > 0.1) ]


lm=sns.lmplot(x='51A_MV',y='51ANR_MV', data=table_for_correlations_highFreq)
axes = lm.axes
#axes[0,0].set_ylim(0.001,0.05)

#axes[0,0].set(yscale="log", xscale="log")
res=linregress(table_for_correlations_highFreq['51A_MV'], table_for_correlations_highFreq['51ANR_MV'])
print(res)


LinregressResult(slope=0.92080042757889113, intercept=0.078620806030560431, rvalue=0.55404367334663751, pvalue=0.0, stderr=0.013313968229028231)

There is a correlation between the frequencies of the major variants in the two libraries, but the correlation coefficient is not great (0.55).

Focus on those with frequency >0.9 in the sample with reamplification


In [19]:
table_for_correlations_highFreq = table_for_correlations.loc[ (table_for_correlations['51ANR_MV'] > 0.9)  ]

lm=sns.lmplot(x='51A_MV',y='51ANR_MV', data=table_for_correlations_highFreq)
axes = lm.axes
#axes[0,0].set_ylim(0.001,0.05)

#axes[0,0].set(yscale="log")
res=linregress(table_for_correlations_highFreq['51A_MV'], table_for_correlations_highFreq['51ANR_MV'])
print(res)


LinregressResult(slope=0.77600804175089222, intercept=0.22278502849188975, rvalue=0.48967318178798563, pvalue=0.0, stderr=0.013302675626933793)

The correlation is not great for positions where there is little variation. It is also surprising to see that lots of positions with frequencies between 0.9 and 1.0 in the non-reamplified library have a frequency of nearly 1.0 in the reamplified one. This seems to suggest that the amplification by PCR overly amplified the dominant variant in lots of cases.

Correlation between second most frequent variants


In [20]:
table_for_correlations = pd.DataFrame ({'51A_Cov':DD51_A ['Coverage'],'51ANR_Cov':DD51_A_no_reamp['Coverage'], '51A_SV':DD51_A ['Second_variant_frequency_quality_corrected'], '51ANR_SV':DD51_A_no_reamp ['Second_variant_frequency_quality_corrected'] })
table_for_correlations_highCov = table_for_correlations.loc[ (table_for_correlations['51A_Cov'] > 1000) & (table_for_correlations['51ANR_Cov'] > 1000) ]

In [21]:
#table_for_correlations_highFreq = table_for_correlations.loc[ (table_for_correlations['51A_MV'] > 0.95)  ]

lm=sns.lmplot(x='51A_SV',y='51ANR_SV', data=table_for_correlations_highCov)
axes = lm.axes
#axes[0,0].set_ylim(0.001,0.05)

#axes[0,0].set(yscale="log")
res=linregress(table_for_correlations_highCov['51A_SV'], table_for_correlations_highCov['51ANR_SV'])
print(res)


LinregressResult(slope=1.0931256196492429, intercept=-0.00037973675091814828, rvalue=0.6325437149737615, pvalue=0.0, stderr=0.012948740114485434)

The correlation between the frequencies of the second most frequent variants is not too bad (correlation coefficient 0.63).

Positions that differ in their most frequent base, and in their second most frequent base

Positions that differ in the most frequent base:


In [22]:
DD51_A ['Position'][DD51_A ['Major_variant'] != DD51_A_no_reamp ['Major_variant']]


Out[22]:
3          3
4          4
5          5
2340    2340
Name: Position, dtype: int64

Number of positions that differ in the second most frequent base:


In [23]:
len(DD51_A ['Position'][DD51_A ['Second_variant'] != DD51_A_no_reamp ['Second_variant']])


Out[23]:
1114

Frequencies of the variants that change between the two library preparations


In [24]:
sns.kdeplot(DD51_A ['Second_variant_frequency_quality_corrected'][DD51_A ['Second_variant'] != DD51_A_no_reamp ['Second_variant']])


Out[24]:
<matplotlib.axes._subplots.AxesSubplot at 0x7fa632c0f048>

In [25]:
(DD51_A ['Second_variant_frequency_quality_corrected'][DD51_A ['Second_variant'] != DD51_A_no_reamp ['Second_variant']]).describe()


Out[25]:
count    1114.000000
mean        0.002228
std         0.002332
min         0.000239
25%         0.001260
50%         0.001690
75%         0.002630
max         0.046251
Name: Second_variant_frequency_quality_corrected, dtype: float64

The second most frequent variants that differ between with and without reamplification usually have a low frequency; for frequent variants, the correlation is not too bad.

Analysis of the diversity at time point 24 with or without reamplification


In [26]:
DD51_A_no_reamp['1_major_variant_frequency'] = 1.0 - DD51_A_no_reamp['Major_variant_frequency_quality_corrected']

#f, ax = plt.subplots(figsize=(10, 7))
#ax.set(yscale="log")

lm=sns.lmplot( x="Position", y="1_major_variant_frequency", data=DD51_A_no_reamp, fit_reg=False, legend=False, size=7, aspect=2, scatter_kws={"s": 10})
axes = lm.axes
axes[0,0].set_ylim(0.001,0.05)
axes[0,0].set(yscale="log")

plt.legend(loc='lower right')



In [27]:
DD51_A['1_major_variant_frequency'] = 1.0 - DD51_A['Major_variant_frequency_quality_corrected']

#f, ax = plt.subplots(figsize=(10, 7))
#ax.set(yscale="log")

lm=sns.lmplot( x="Position", y="1_major_variant_frequency", data=DD51_A, fit_reg=False, legend=False, size=7, aspect=2, scatter_kws={"s": 10})
axes = lm.axes
axes[0,0].set_ylim(0.001,0.05)
axes[0,0].set(yscale="log")

plt.legend(loc='lower right')



In [28]:
f, ax = plt.subplots(figsize=(15, 7))


lm=sns.kdeplot(np.log(DD51_A["1_major_variant_frequency"]), label="DD51_A")

axes = lm.axes
axes.set_xlim(-8,-4)

lm=sns.kdeplot(np.log(DD51_A_no_reamp["1_major_variant_frequency"]), label="DD51_A_no_reamp")


/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:4: RuntimeWarning: divide by zero encountered in log
  after removing the cwd from sys.path.
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:9: RuntimeWarning: divide by zero encountered in log
  if __name__ == '__main__':

Positions that increase in frequency

Control, replicate A


In [29]:
tables_A = [DD3_A, DD6_A, DD9_A, DD12_A, DD24_A, DD51_A]
increasing_A = get_increasing_variants(tables_A)
print("There are "+str(len(increasing_A))+" positions that rise in frequency.")
print("Those are:")
print(increasing_A.keys())


There are 94 positions that rise in frequency.
Those are:
dict_keys([53, 55, 138, 165, 173, 316, 332, 357, 491, 824, 901, 1124, 1471, 1500, 1552, 1670, 1785, 1883, 1951, 2039, 2101, 2102, 2235, 2340, 2541, 2702, 2783, 2792, 2804, 2811, 3087, 3118, 3122, 3129, 3563, 3604, 4001, 4077, 4099, 4307, 4417, 4421, 4783, 4837, 4900, 4904, 5010, 5170, 5293, 5321, 5536, 5537, 5542, 5553, 5696, 5782, 5817, 5880, 5901, 6397, 6424, 6459, 6525, 6724, 6735, 6751, 6804, 6866, 6891, 6993, 7251, 7305, 7338, 8219, 8449, 8466, 8826, 8850, 8952, 9079, 9142, 9251, 9297, 9607, 9978, 10025, 10153, 10360, 10405, 10433, 10457, 10618, 10630, 10633])
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:22: RuntimeWarning: invalid value encountered in double_scalars
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:18: RuntimeWarning: invalid value encountered in double_scalars

Control, replicate D


In [30]:
tables_D = [DD3_D, DD6_D, DD9_D, DD12_D, DD24_D]
increasing_D = get_increasing_variants(tables_D)
print("There are "+str(len(increasing_D))+" positions that rise in frequency.")
print("Those are:")
print(increasing_D.keys())


/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:22: RuntimeWarning: invalid value encountered in double_scalars
There are 321 positions that rise in frequency.
Those are:
dict_keys([40, 138, 164, 176, 188, 207, 243, 254, 263, 295, 298, 433, 466, 531, 533, 535, 548, 570, 599, 664, 703, 731, 740, 796, 860, 957, 997, 1001, 1110, 1118, 1179, 1284, 1305, 1346, 1355, 1373, 1390, 1434, 1501, 1545, 1556, 1623, 1635, 1748, 1784, 1809, 1916, 1937, 1968, 1973, 2042, 2044, 2048, 2069, 2086, 2094, 2105, 2107, 2121, 2185, 2187, 2200, 2233, 2244, 2249, 2280, 2287, 2288, 2361, 2375, 2402, 2406, 2426, 2427, 2433, 2435, 2446, 2450, 2474, 2510, 2511, 2518, 2521, 2526, 2534, 2572, 2607, 2698, 2707, 2735, 2754, 2757, 2804, 2816, 2820, 2824, 2828, 2833, 2834, 2843, 2919, 2925, 2930, 2973, 2989, 2991, 3014, 3039, 3080, 3123, 3242, 3274, 3381, 3391, 3397, 3523, 3541, 3546, 3595, 3628, 3722, 3729, 3738, 3743, 3754, 3837, 3872, 3883, 3926, 3942, 3968, 3976, 4018, 4062, 4116, 4160, 4164, 4168, 4179, 4181, 4190, 4197, 4201, 4209, 4221, 4273, 4355, 4475, 4483, 4536, 4537, 4576, 4606, 4634, 4706, 4799, 4818, 4879, 4881, 4913, 4981, 5119, 5125, 5126, 5159, 5198, 5269, 5278, 5300, 5388, 5466, 5595, 5598, 5600, 5612, 5636, 5638, 5640, 5660, 5693, 5697, 5763, 5776, 5812, 5921, 5969, 5971, 5992, 6020, 6101, 6105, 6110, 6174, 6278, 6282, 6286, 6302, 6346, 6418, 6419, 6462, 6473, 6522, 6537, 6557, 6558, 6568, 6582, 6628, 6634, 6658, 6690, 6692, 6711, 6723, 6725, 6729, 6774, 6807, 6883, 6887, 6893, 6936, 6944, 6953, 6971, 7105, 7159, 7172, 7207, 7285, 7286, 7294, 7308, 7325, 7377, 7390, 7420, 7666, 7740, 7760, 7822, 7867, 7901, 7956, 7978, 8098, 8119, 8125, 8136, 8153, 8157, 8174, 8214, 8298, 8376, 8417, 8455, 8472, 8477, 8480, 8626, 8642, 8680, 8702, 8724, 8727, 8742, 8779, 8851, 8867, 8874, 8875, 8896, 8912, 9081, 9149, 9153, 9186, 9209, 9262, 9395, 9415, 9429, 9439, 9468, 9516, 9529, 9539, 9585, 9589, 9592, 9620, 9654, 9666, 9671, 9679, 9771, 9807, 9838, 9943, 10004, 10019, 10028, 10077, 10153, 10167, 10341, 10361, 10444, 10446, 10466, 10531, 10538, 10558, 10572, 10606, 10642, 10655, 10711, 10724])

Control, replicate E


In [31]:
tables_E = [DD6_E, DD9_E]
increasing_E = get_increasing_variants(tables_E)
print("There are "+str(len(increasing_E))+" positions that rise in frequency.")
print("There are too many of them, we choose not to print them.")


/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:22: RuntimeWarning: invalid value encountered in double_scalars
There are 6384 positions that rise in frequency.
There are too many of them, we choose not to print them.

TLR3 treatment


In [32]:
tables_TA = [TD9_A, TD12_A, TD24_A, TD51_A]
increasing_TA = get_increasing_variants(tables_TA)
print("There are "+str(len(increasing_TA))+" positions that rise in frequency.")
print("Those are:")
print(increasing_TA.keys())


/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:20: RuntimeWarning: invalid value encountered in double_scalars
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:22: RuntimeWarning: invalid value encountered in double_scalars
There are 241 positions that rise in frequency.
Those are:
dict_keys([84, 137, 146, 158, 168, 226, 233, 269, 285, 305, 415, 461, 486, 487, 520, 580, 628, 629, 647, 649, 719, 752, 832, 856, 877, 916, 917, 919, 941, 958, 974, 1003, 1028, 1085, 1088, 1121, 1137, 1149, 1181, 1205, 1319, 1321, 1328, 1339, 1344, 1392, 1403, 1445, 1524, 1525, 1546, 1618, 1620, 1622, 1633, 1745, 1747, 1750, 1756, 1785, 1874, 1888, 2117, 2193, 2243, 2265, 2309, 2356, 2373, 2409, 2425, 2428, 2435, 2442, 2472, 2492, 2495, 2503, 2598, 2609, 2720, 2733, 2864, 2873, 2950, 3007, 3145, 3258, 3300, 3417, 3482, 3547, 3553, 3608, 3641, 3644, 3658, 3696, 3709, 3746, 3797, 3805, 3831, 3837, 3890, 3904, 4084, 4288, 4302, 4347, 4360, 4363, 4474, 4502, 4564, 4606, 4797, 4824, 4836, 4842, 4849, 4897, 4900, 4925, 4974, 5032, 5056, 5108, 5186, 5252, 5278, 5307, 5332, 5449, 5494, 5675, 5691, 5747, 5821, 5848, 5887, 5934, 5935, 6032, 6037, 6087, 6317, 6505, 6551, 6553, 6562, 6588, 6593, 6682, 6691, 6701, 6708, 6737, 6760, 6793, 6812, 6813, 6880, 6882, 6934, 6961, 6962, 7057, 7132, 7177, 7184, 7234, 7345, 7396, 7438, 7474, 7533, 7557, 7607, 7725, 7745, 7781, 7872, 7891, 7976, 8160, 8218, 8243, 8279, 8280, 8311, 8320, 8325, 8353, 8354, 8460, 8519, 8529, 8663, 8673, 8702, 8851, 8886, 8887, 9033, 9087, 9135, 9152, 9175, 9193, 9209, 9226, 9302, 9316, 9375, 9553, 9587, 9602, 9620, 9622, 9625, 9633, 9654, 9727, 9740, 9930, 9964, 9990, 10159, 10177, 10292, 10360, 10389, 10394, 10445, 10455, 10525, 10534, 10629, 10644, 10757])

Plotting only positions that consistently rise in frequency, control, replicate A

Here we plot the frequency of the major variant at day 51 in the control experiment, A. We color the dots according to whether they were consistently rising in frequency or not.


In [33]:
sns.set_palette("hls")
sns.set_context("poster")

In [34]:
increasing_A_keys = increasing_A.keys()
is_increasing = []
for i in DD51_A['Position']:
    if i in increasing_A_keys:
        is_increasing.append("Increasing")
    else:
        is_increasing.append("Not increasing")
to_plot = pd.DataFrame ({'Position':DD51_A['Position'], 'Major_variant_frequency_quality_corrected':DD51_A ['Major_variant_frequency_quality_corrected'],'Is_increasing':is_increasing}) 
sns.lmplot( x="Position", y="Major_variant_frequency_quality_corrected", data=to_plot, fit_reg=False, hue='Is_increasing', legend=False, size=7, aspect=2)
#plt.legend(loc='lower right')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plot_positions()
#plot_genes()


Increase per consistently increasing position

Here we plot the increase in frequency of the second variant, over the course of the entire experiment, in the different replicates.


In [35]:
def plot_increase_per_increasing_position (increasing_list, last_table):
    increasing_list_keys = increasing_list.keys()
    increase = []
    is_increasing = []
    for i in last_table['Position']:
        if i in increasing_list_keys:
            increase.append(increasing_list[i][1][len(increasing_list[i][1])-1] - increasing_list[i][1][0])
            is_increasing.append("Increasing")
        else:
            increase.append(0.0)
            is_increasing.append("Not increasing")
    to_plot = pd.DataFrame ({'Position':last_table['Position'], 'Increase':increase,'Is_increasing':is_increasing}) 
    sns.lmplot( x="Position", y="Increase", data=to_plot, fit_reg=False, hue='Is_increasing', legend=False, size=7, aspect=2)
#    plt.legend(loc='lower right')
    plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
    plot_positions()
    #plot_genes()
    return

Control, replicate A


In [36]:
plot_increase_per_increasing_position (increasing_A, DD51_A)


Control, replicate D:


In [37]:
plot_increase_per_increasing_position (increasing_D, DD24_D)


Control, replicate E (not to be trusted: only two time points):


In [38]:
plot_increase_per_increasing_position (increasing_E, DD9_E)


TLR3 treatment:


In [39]:
plot_increase_per_increasing_position (increasing_TA, TD51_A)


Plots overlaying different time points

Here we plot the frequencies of the major variants, for the different time points of an experiment.


In [40]:
overlay_table =  pd.DataFrame ({'Position':DD3_A['Position'], 'DD3_A':DD3_A ['Major_variant_frequency_quality_corrected'], 'DD6_A':DD6_A ['Major_variant_frequency_quality_corrected'],'DD9_A':DD9_A ['Major_variant_frequency_quality_corrected'],'DD12_A':DD12_A ['Major_variant_frequency_quality_corrected'], 'DD24_A':DD24_A ['Major_variant_frequency_quality_corrected'], 'DD51_A':DD51_A ['Major_variant_frequency_quality_corrected']})
siz = len(DD3_A ['Position'])
sample = siz*["DD3A"]+siz*["DD6A"]+siz*["DD9A"]+siz*["DD12A"]+siz*["DD24A"]+siz*["DD51A"]

overlay_table_concat = pd.DataFrame ({'Position':pd.concat([DD3_A['Position'],DD6_A['Position'],DD9_A['Position'],DD12_A['Position'],DD24_A['Position'],DD51_A['Position']]), 'Major_variant_frequency_quality_corrected':pd.concat([DD3_A ['Major_variant_frequency_quality_corrected'],DD6_A ['Major_variant_frequency_quality_corrected'], DD9_A ['Major_variant_frequency_quality_corrected'],DD12_A ['Major_variant_frequency_quality_corrected'], DD24_A ['Major_variant_frequency_quality_corrected'], DD51_A ['Major_variant_frequency_quality_corrected'] ]), 'sample':sample})

In [41]:
sns.lmplot( x="Position", y="Major_variant_frequency_quality_corrected", data=overlay_table_concat, fit_reg=False, hue='sample', legend=False, size=7, aspect=2)
#plt.legend(loc='lower right')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.ylabel("Frequency of major variant")
plot_positions()
#plot_genes()


Same thing, smaller dots


In [42]:
sns.lmplot( x="Position", y="Major_variant_frequency_quality_corrected", data=overlay_table_concat, fit_reg=False, hue='sample', legend=False, size=7, aspect=2, scatter_kws={"s": 10})
#plt.legend(loc='lower right')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.ylabel("Frequency of major variant")

plot_positions()
#plot_genes()


Same thing, without sample at 51 days


In [43]:
overlay_table_concat_no51 = overlay_table_concat.loc[overlay_table_concat['sample']!= "DD51A"]
sns.lmplot( x="Position", y="Major_variant_frequency_quality_corrected", data= overlay_table_concat_no51, fit_reg=False, hue='sample', legend=False, size=7, aspect=2)
#plt.legend(loc='lower right')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.ylabel("Frequency of major variant")

plot_positions()
#plot_genes()


Difference in frequency between day 3 and day 51?

Here we want to know if over all positions, the diversity has increased between day 3 and day 51.


In [44]:
overlay_table_concat['1_major_variant_frequency'] = 1.0 - overlay_table_concat['Major_variant_frequency_quality_corrected']

#f, ax = plt.subplots(figsize=(10, 7))
#ax.set(yscale="log")

overlay_table_concat_nointer = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD24A") & (overlay_table_concat['sample']!= "DD12A") & (overlay_table_concat['sample']!= "DD9A") & (overlay_table_concat['sample']!= "DD6A")]
lm=sns.lmplot( x="Position", y="1_major_variant_frequency", data=overlay_table_concat_nointer, fit_reg=False, hue='sample', legend=False, size=7, aspect=2, scatter_kws={"s": 10})
axes = lm.axes
axes[0,0].set_ylim(0.001,0.05)
axes[0,0].set(yscale="log")

#plt.legend(loc='lower right')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plot_positions()
#plot_genes()


Day 3 only


In [45]:
overlay_table_concat_nointer = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD51A") &(overlay_table_concat['sample']!= "DD24A") & (overlay_table_concat['sample']!= "DD12A") & (overlay_table_concat['sample']!= "DD9A") & (overlay_table_concat['sample']!= "DD6A")]
lm=sns.lmplot( x="Position", y="1_major_variant_frequency", data=overlay_table_concat_nointer, fit_reg=False, hue='sample', legend=False, size=7, aspect=2, scatter_kws={"s": 10})
axes = lm.axes
axes[0,0].set_ylim(0.001,0.05)
axes[0,0].set(yscale="log")

#plt.legend(loc='lower right')
#plot_positions()
#plot_genes()


Out[45]:
[None]

Boxplot day 3-day 51


In [46]:
overlay_table_concat_nointer = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD24A") & (overlay_table_concat['sample']!= "DD12A") & (overlay_table_concat['sample']!= "DD9A") & (overlay_table_concat['sample']!= "DD6A")]
f, ax = plt.subplots(figsize=(10, 7))
ax.set(yscale="log")
lm=sns.violinplot(x='sample', y="1_major_variant_frequency", data=overlay_table_concat_nointer,  hue='sample', ax=ax, bw=0.2)
axes = lm.axes
axes.set_ylim(0.001,0.05)
axes.set(yscale="log")

medians = overlay_table_concat_nointer.groupby(['sample'])['1_major_variant_frequency'].median().values
plt.axhline(y=medians[0], linewidth=1, linestyle=':')
plt.axhline(y=medians[1], linewidth=1, linestyle=':')
print(medians)
syn = overlay_table_concat_nointer['1_major_variant_frequency'][overlay_table_concat_nointer['sample']=="DD3A"]
nonsyn = overlay_table_concat_nointer['1_major_variant_frequency'][overlay_table_concat_nointer['sample']=="DD51A"]
print("T-test p-value: "+str(ttest_ind(syn, nonsyn)[1]))
print((syn).median())
print((nonsyn).median())
# giving title to the plot
plt.title("Diversity through time");


[ 0.004337  0.004941]
T-test p-value: 0.00235244161087
0.004337000000000035
0.004940999999999973

Density (KDE) plot, control, replicate A


In [47]:
f, ax = plt.subplots(figsize=(15, 7))

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD51A") & (overlay_table_concat['sample']!= "DD24A") & (overlay_table_concat['sample']!= "DD12A") & (overlay_table_concat['sample']!= "DD9A") & (overlay_table_concat['sample']!= "DD6A")]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="DD3A")
lm.set_ylabel('Density')
lm.set_xlabel('Logarithm of the diversity per position')

axes = lm.axes
axes.set_xlim(-8,-4)

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD51A") & (overlay_table_concat['sample']!= "DD24A") & (overlay_table_concat['sample']!= "DD12A") & (overlay_table_concat['sample']!= "DD9A") & (overlay_table_concat['sample']!= "DD3A")]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="DD6A")

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD51A") & (overlay_table_concat['sample']!= "DD24A") & (overlay_table_concat['sample']!= "DD12A") & (overlay_table_concat['sample']!= "DD6A") & (overlay_table_concat['sample']!= "DD3A")]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="DD9A")

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD51A") & (overlay_table_concat['sample']!= "DD24A") & (overlay_table_concat['sample']!= "DD9A") & (overlay_table_concat['sample']!= "DD6A") & (overlay_table_concat['sample']!= "DD3A")]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="DD12A")

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD51A") & (overlay_table_concat['sample']!= "DD12A") & (overlay_table_concat['sample']!= "DD9A") & (overlay_table_concat['sample']!= "DD6A") & (overlay_table_concat['sample']!= "DD3A")]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="DD24A")

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD24A") & (overlay_table_concat['sample']!= "DD12A") & (overlay_table_concat['sample']!= "DD9A") & (overlay_table_concat['sample']!= "DD6A") & (overlay_table_concat['sample']!= "DD3A")]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="DD51A")


/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:9: RuntimeWarning: divide by zero encountered in log
  if __name__ == '__main__':
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:22: RuntimeWarning: divide by zero encountered in log
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:30: RuntimeWarning: divide by zero encountered in log
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:38: RuntimeWarning: divide by zero encountered in log
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:46: RuntimeWarning: divide by zero encountered in log
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:54: RuntimeWarning: divide by zero encountered in log

Density (KDE) plot, control, replicate D


In [48]:
overlay_table =  pd.DataFrame ({'Position':DD3_D['Position'], 'DD3_D':DD3_D ['Major_variant_frequency_quality_corrected'], 'DD6_D':DD6_D ['Major_variant_frequency_quality_corrected'],'DD9_D':DD9_D ['Major_variant_frequency_quality_corrected'],'DD12_D':DD12_D ['Major_variant_frequency_quality_corrected'], 'DD24_D':DD24_D ['Major_variant_frequency_quality_corrected']})
siz = len(DD3_D ['Position'])
sample = siz*["DD3D"]+siz*["DD6D"]+siz*["DD9D"]+siz*["DD12D"]+siz*["DD24D"]

overlay_table_concat = pd.DataFrame ({'Position':pd.concat([DD3_D['Position'],DD6_D['Position'],DD9_D['Position'],DD12_D['Position'],DD24_D['Position']]), 'Major_variant_frequency_quality_corrected':pd.concat([DD3_D ['Major_variant_frequency_quality_corrected'],DD6_D ['Major_variant_frequency_quality_corrected'], DD9_D ['Major_variant_frequency_quality_corrected'],DD12_D ['Major_variant_frequency_quality_corrected'], DD24_D ['Major_variant_frequency_quality_corrected'] ]), 'sample':sample})
overlay_table_concat["1_major_variant_frequency"] = 1-overlay_table_concat['Major_variant_frequency_quality_corrected']

In [49]:
f, ax = plt.subplots(figsize=(15, 7))

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD24D") & (overlay_table_concat['sample']!= "DD12D") & (overlay_table_concat['sample']!= "DD9D") & (overlay_table_concat['sample']!= "DD6D")]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="DD3D")
lm.set_ylabel('Density')
lm.set_xlabel('Logarithm of the diversity per position')

axes = lm.axes
axes.set_xlim(-8,-4)

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD24D") & (overlay_table_concat['sample']!= "DD12D") & (overlay_table_concat['sample']!= "DD9D") & (overlay_table_concat['sample']!= "DD3D")]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="DD6D")

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD24D") & (overlay_table_concat['sample']!= "DD12D") & (overlay_table_concat['sample']!= "DD6D") & (overlay_table_concat['sample']!= "DD3D")]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="DD9D")

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD24D") & (overlay_table_concat['sample']!= "DD9D") & (overlay_table_concat['sample']!= "DD6D") & (overlay_table_concat['sample']!= "DD3D")]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="DD12D")

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "DD12D") & (overlay_table_concat['sample']!= "DD9D") & (overlay_table_concat['sample']!= "DD6D") & (overlay_table_concat['sample']!= "DD3D")]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="DD24D")


/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:9: RuntimeWarning: divide by zero encountered in log
  if __name__ == '__main__':
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:22: RuntimeWarning: divide by zero encountered in log
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:30: RuntimeWarning: divide by zero encountered in log
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:38: RuntimeWarning: divide by zero encountered in log
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:46: RuntimeWarning: divide by zero encountered in log

Density (KDE) plot, TLR3 treatment


In [50]:
overlay_table =  pd.DataFrame ({'Position':TD9_A['Position'], 'TD9':TD9_A ['Major_variant_frequency_quality_corrected'], 'TD12':TD12_A ['Major_variant_frequency_quality_corrected'],'TD24':TD24_A ['Major_variant_frequency_quality_corrected'],'TD51':TD51_A ['Major_variant_frequency_quality_corrected'] })
siz = len(TD9_A ['Position'])
sample = siz*["TD9"]+siz*["TD12"]+siz*["TD24"]+siz*["TD51"]

overlay_table_concat = pd.DataFrame ({'Position':pd.concat([TD9_A['Position'], TD12_A['Position'], TD24_A['Position'], TD51_A['Position']]), 'Major_variant_frequency_quality_corrected':pd.concat([ TD9_A ['Major_variant_frequency_quality_corrected'], TD12_A ['Major_variant_frequency_quality_corrected'], TD24_A ['Major_variant_frequency_quality_corrected'], TD51_A ['Major_variant_frequency_quality_corrected'] ]), 'sample':sample})
overlay_table_concat["1_major_variant_frequency"] = 1-overlay_table_concat['Major_variant_frequency_quality_corrected']

In [51]:
f, ax = plt.subplots(figsize=(15, 7))

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "TD12") & (overlay_table_concat['sample']!= "TD24") & (overlay_table_concat['sample']!= "TD51") ]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="TD9")
lm.set_ylabel('Density')
lm.set_xlabel('Logarithm of the diversity per position')

axes = lm.axes
axes.set_xlim(-8,-4)

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "TD9") & (overlay_table_concat['sample']!= "TD24") & (overlay_table_concat['sample']!= "TD51") ]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="TD12")

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "TD9") & (overlay_table_concat['sample']!= "TD12") & (overlay_table_concat['sample']!= "TD51") ]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="TD24")

temp = overlay_table_concat.loc[ (overlay_table_concat['sample']!= "TD9") & (overlay_table_concat['sample']!= "TD12") & (overlay_table_concat['sample']!= "TD24") ]
del temp['sample']
del temp['Major_variant_frequency_quality_corrected']
del temp['Position']
#print(temp.describe())
#ax = sns.kdeplot("1_major_variant_frequency", data=temp)
lm=sns.kdeplot(np.log(temp["1_major_variant_frequency"]), label="TD51")


/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:10: RuntimeWarning: divide by zero encountered in log
  # Remove the CWD from sys.path while we load stuff.
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:23: RuntimeWarning: divide by zero encountered in log
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:31: RuntimeWarning: divide by zero encountered in log
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:39: RuntimeWarning: divide by zero encountered in log

In the analyses above, it would seem that the diversity, i.e. the amount of polymorphism, has increased a bit, as in Acevedo et al. 2014. However, presumably because we do not use Cirseq, we have a bimodal distribution of diversity, with possibly first a peak corresponding to sequencing errors, and second a peak corresponding to variants that do rise in frequency through time. In support of this hypothesis, the only CirSeq data we have, analyzed at the top of this document, does not show this bimodal distribution, and in the graphs above for the replicate A and the TLR3 treatment condition, only the variants with high frequency seem to increase in frequency through time. In the replicate D, it is not clear because the increase seems to be in both peaks.

Diversity through time plot, replicate A

Here we do the same type of analysis, but looking at all time points, not just 3 and 51.


In [52]:
f, ax = plt.subplots(figsize=(10, 7))
ax.set(yscale="log")
sns.violinplot(x='sample', y="1_major_variant_frequency", data=overlay_table_concat,  ax=ax, bw=0.2)

# giving title to the plot
plt.title("Diversity through time");

medians = overlay_table_concat.groupby(['sample'], sort=False)['1_major_variant_frequency'].median()
print("Median values of the diversity per sample:" )
print(medians)
means = overlay_table_concat.groupby(['sample'], sort=False)['1_major_variant_frequency'].mean()
print("Mean values of the diversity per sample:" )
print(means)


Median values of the diversity per sample:
sample
TD9     0.004507
TD12    0.004861
TD24    0.004768
TD51    0.004543
Name: 1_major_variant_frequency, dtype: float64
Mean values of the diversity per sample:
sample
TD9     0.004674
TD12    0.005343
TD24    0.004901
TD51    0.005073
Name: 1_major_variant_frequency, dtype: float64

The increase is no longer obvious, certainly not monotonous. If anything, what might be happening is that first diversity decreases from time points 3 to 9, then increases. If this is true, this could be compatible with a selective sweep in the first days, so that only some haplotypes carrying the variants with the high fitness increase in frequency.

Same thing, Sample D


In [53]:
overlay_table =  pd.DataFrame ({'Position':DD3_D['Position'], 'DD3_D':DD3_D ['Major_variant_frequency_quality_corrected'], 'DD6_D':DD6_D ['Major_variant_frequency_quality_corrected'],'DD9_D':DD9_D ['Major_variant_frequency_quality_corrected'],'DD12_D':DD12_D ['Major_variant_frequency_quality_corrected'],'DD24_D':DD24_D ['Major_variant_frequency_quality_corrected']})
siz = len(DD3_D ['Position'])
sample = siz*["DD3D"]+siz*["DD6D"]+siz*["DD9D"]+siz*["DD12D"]+siz*["DD24D"]

overlay_table_concat = pd.DataFrame ({'Position':pd.concat([DD3_D['Position'],DD6_D['Position'],DD9_D['Position'],DD12_D['Position'],DD24_D['Position']]), 'Major_variant_frequency_quality_corrected':pd.concat([DD3_D ['Major_variant_frequency_quality_corrected'],DD6_D ['Major_variant_frequency_quality_corrected'], DD9_D ['Major_variant_frequency_quality_corrected'],DD12_D ['Major_variant_frequency_quality_corrected'], DD24_D ['Major_variant_frequency_quality_corrected'] ]), 'sample':sample})

overlay_table_concat['1_major_variant_frequency'] = 1.0 - overlay_table_concat['Major_variant_frequency_quality_corrected']

In [54]:
sns.lmplot( x="Position", y="Major_variant_frequency_quality_corrected", data=overlay_table_concat, fit_reg=False, hue='sample', legend=False, size=7, aspect=2)
#plt.legend(loc='lower right')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plt.ylabel("Frequency of major variant")
plot_positions()
#plot_genes()


Diversity through time plot, replicate D


In [55]:
f, ax = plt.subplots(figsize=(10, 7))
ax.set(yscale="log")
sns.violinplot(x='sample', y="1_major_variant_frequency", data=overlay_table_concat,  ax=ax, bw=0.2)

# giving title to the plot
plt.title("Diversity through time");

medians = overlay_table_concat.groupby(['sample'], sort=False)['1_major_variant_frequency'].median()
print("Median values of the diversity per sample:" )
print(medians)
means = overlay_table_concat.groupby(['sample'], sort=False)['1_major_variant_frequency'].mean()
print("Mean values of the diversity per sample:" )
print(means)


Median values of the diversity per sample:
sample
DD3D     0.004186
DD6D     0.004450
DD9D     0.004750
DD12D    0.004845
DD24D    0.004713
Name: 1_major_variant_frequency, dtype: float64
Mean values of the diversity per sample:
sample
DD3D     0.004531
DD6D     0.004935
DD9D     0.005164
DD12D    0.005557
DD24D    0.005362
Name: 1_major_variant_frequency, dtype: float64

In this case, it may be that the diversity increases then reaches some sort of plateau at about a median of 0.0047-0.0048.

Same thing, sample E


In [56]:
siz = len(DD6_E ['Position'])
sample = siz*["DD6E"]+siz*["DD9E"]

overlay_table_concat = pd.DataFrame ({'Position':pd.concat([DD6_E['Position'],DD9_E['Position']]), 'Major_variant_frequency_quality_corrected':pd.concat([DD6_E ['Major_variant_frequency_quality_corrected'],DD9_E ['Major_variant_frequency_quality_corrected']]), 'sample':sample})
overlay_table_concat['1_major_variant_frequency'] = 1.0 - overlay_table_concat['Major_variant_frequency_quality_corrected']

In [57]:
sns.lmplot( x="Position", y="Major_variant_frequency_quality_corrected", data=overlay_table_concat, fit_reg=False, hue='sample', legend=False, size=7, aspect=2)
plt.ylabel("Frequency of major variant")
#plt.legend(loc='lower right')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plot_positions()
#plot_genes()


Diversity through time plot, replicate E


In [58]:
f, ax = plt.subplots(figsize=(10, 7))
ax.set(yscale="log")
sns.violinplot(x='sample', y="1_major_variant_frequency", data=overlay_table_concat,  ax=ax, bw=0.2)

# giving title to the plot
plt.title("Diversity through time");

medians = overlay_table_concat.groupby(['sample'], sort=False)['1_major_variant_frequency'].median()
print("Median values of the diversity per sample:" )
print(medians)
means = overlay_table_concat.groupby(['sample'], sort=False)['1_major_variant_frequency'].mean()
print("Mean values of the diversity per sample:" )
print(means)


Median values of the diversity per sample:
sample
DD6E    0.004326
DD9E    0.004504
Name: 1_major_variant_frequency, dtype: float64
Mean values of the diversity per sample:
sample
DD6E    0.004740
DD9E    0.004837
Name: 1_major_variant_frequency, dtype: float64

Although we only have two time points, the results are consistent with an increase in the first few days.

Same thing, TLR3 activation


In [59]:
overlay_table =  pd.DataFrame ({'Position':TD9_A['Position'], 'TD9_A':TD9_A ['Major_variant_frequency_quality_corrected'], 'TD12_A':TD12_A ['Major_variant_frequency_quality_corrected'],'TD24_A':TD24_A ['Major_variant_frequency_quality_corrected'], 'TD51_A':TD51_A ['Major_variant_frequency_quality_corrected']})
siz = len(TD9_A ['Position'])
sample = siz*["TD9A"]+siz*["TD12A"]+siz*["TD24A"]+siz*["TD51A"]

overlay_table_concat = pd.DataFrame ({'Position':pd.concat([TD9_A['Position'],TD12_A['Position'],TD24_A['Position'],TD51_A['Position']]), 'Major_variant_frequency_quality_corrected':pd.concat([TD9_A['Major_variant_frequency_quality_corrected'],TD12_A['Major_variant_frequency_quality_corrected'],TD24_A['Major_variant_frequency_quality_corrected'],TD51_A['Major_variant_frequency_quality_corrected'] ]), 'sample':sample})
overlay_table_concat['1_major_variant_frequency'] = 1.0 - overlay_table_concat['Major_variant_frequency_quality_corrected']

In [60]:
sns.lmplot( x="Position", y="Major_variant_frequency_quality_corrected", data=overlay_table_concat, fit_reg=False, hue='sample', legend=False, size=7, aspect=2)
plt.ylabel("Frequency of major variant")
#plt.legend(loc='lower right')
plt.legend(bbox_to_anchor=(1.05, 1), loc=2, borderaxespad=0.)
plot_positions()
#plot_genes()


Diversity through time plot, TLR3 activation


In [61]:
f, ax = plt.subplots(figsize=(10, 7))
ax.set(yscale="log")
sns.violinplot(x='sample', y="1_major_variant_frequency", data=overlay_table_concat,  ax=ax, bw=0.2)

# giving title to the plot
plt.title("Diversity through time");

medians = overlay_table_concat.groupby(['sample'], sort=False)['1_major_variant_frequency'].median()
print("Median values of the diversity per sample:" )
print(medians)
means = overlay_table_concat.groupby(['sample'], sort=False)['1_major_variant_frequency'].mean()
print("Mean values of the diversity per sample:" )
print(means)


Median values of the diversity per sample:
sample
TD9A     0.004507
TD12A    0.004861
TD24A    0.004768
TD51A    0.004543
Name: 1_major_variant_frequency, dtype: float64
Mean values of the diversity per sample:
sample
TD9A     0.004674
TD12A    0.005343
TD24A    0.004901
TD51A    0.005073
Name: 1_major_variant_frequency, dtype: float64

The diversity initially increases, but then it may be decreasing between 24 and 51. Not clear with few time points and no replicate.

Analysis of coverages: overlay and correlations


In [62]:
# construction of the data table
siz = len(DD3_A ['Coverage'])
sample = siz*["DD3A"]+siz*["DD6A"]+siz*["DD9A"]+siz*["DD12A"]+siz*["DD24A"]+siz*["DD51A"]
overlay_table_concat_DDA = pd.DataFrame ({'Position':pd.concat([DD3_A['Position'],DD6_A['Position'],DD9_A['Position'],DD12_A['Position'],DD24_A['Position'],DD51_A['Position']]), 'Coverage':pd.concat([DD3_A ['Coverage'],DD6_A ['Coverage'], DD9_A ['Coverage'],DD12_A ['Coverage'], DD24_A ['Coverage'], DD51_A ['Coverage'] ]), 'sample':sample})

siz = len(DD3_D ['Coverage'])
sample = siz*["DD3D"]+siz*["DD6D"]+siz*["DD9D"]+siz*["DD12D"]+siz*["DD24D"]
overlay_table_concat_DDD = pd.DataFrame ({'Position':pd.concat([DD3_D['Position'],DD6_D['Position'],DD9_D['Position'],DD12_D['Position'],DD24_D['Position']]), 'Coverage':pd.concat([DD3_D ['Coverage'],DD6_D ['Coverage'], DD9_D ['Coverage'],DD12_D ['Coverage'], DD24_D ['Coverage'] ]), 'sample':sample})

siz = len(DD6_E ['Coverage'])
sample = siz*["DD6E"]+siz*["DD9E"]
overlay_table_concat_DDE = pd.DataFrame ({'Position':pd.concat([DD6_E['Position'],DD9_E['Position']]), 'Coverage':pd.concat([DD6_E ['Coverage'],DD9_E ['Coverage']]), 'sample':sample})

siz = len(TD9_A ['Coverage'])
sample = siz*["TD9A"]+siz*["TD12A"]+siz*["TD24A"]+siz*["TD51A"]
overlay_table_concat_TD = pd.DataFrame ({'Position':pd.concat([TD9_A['Position'],TD12_A['Position'],TD24_A['Position'],TD51_A['Position']]), 'Coverage':pd.concat([TD9_A['Coverage'],TD12_A['Coverage'],TD24_A['Coverage'],TD51_A['Coverage'] ]), 'sample':sample})

overlay_table_concat=pd.concat([overlay_table_concat_DDA,overlay_table_concat_DDD, overlay_table_concat_DDE, overlay_table_concat_TD])

In [63]:
class MathTextSciFormatter(mticker.Formatter):
    def __init__(self, fmt="%1.2e"):
        self.fmt = fmt
    def __call__(self, x, pos=None):
        s = self.fmt % x
        decimal_point = '.'
        positive_sign = '+'
        tup = s.split('e')
        significand = tup[0].rstrip(decimal_point)
        sign = tup[1][0].replace(positive_sign, '')
        exponent = tup[1][1:].lstrip('0')
        if exponent:
            exponent = '10^{%s%s}' % (sign, exponent)
        if significand and exponent:
            s =  r'%s{\times}%s' % (significand, exponent)
        else:
            s =  r'%s%s' % (significand, exponent)
        return "${}$".format(s)


markers = ['x','o','v','^','<', '+', 'x','o','v','^','<', '+', 'x','o','v','^','<']

sns.lmplot( x="Position", y="Coverage", data=overlay_table_concat, fit_reg=False, hue='sample', legend=False, size=7, aspect=2, lowess=True, markers=markers, scatter_kws={"s": 20})

# Format with 2 decimal places
plt.gca().yaxis.set_major_formatter(MathTextSciFormatter("%1.2e"))

#plt.legend(loc='lower right')
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3, mode="expand", borderaxespad=0., ncol=4)

plot_positions()
#plot_genes()



In [64]:
siz = len(DD3_A ['Coverage'])
table_for_correlations = pd.DataFrame ({'DD3A':DD3_A ['Coverage'],'DD6_A':DD6_A['Coverage'], 'DD9_A':DD9_A ['Coverage'], 'DD12_A':DD12_A ['Coverage'], 'DD24_A':DD24_A ['Coverage'], 'DD51_A':DD51_A ['Coverage'],
                                        'DD3_D':DD3_D ['Coverage'],'DD6_D':DD6_D ['Coverage'], 'DD9_D':DD9_D ['Coverage'],'DD12_D':DD12_D ['Coverage'], 'DD24_D':DD24_D ['Coverage'],
                                        'DD6_E':DD6_E ['Coverage'], 'DD9_E':DD9_E ['Coverage'],
                                        'TD9_A':TD9_A['Coverage'],'TD12_A':TD12_A['Coverage'],'TD24_A':TD24_A['Coverage'],'TD51_A':TD51_A['Coverage'] })

In [65]:
sns.pairplot(table_for_correlations, vars=['DD3A','DD3_D','DD6_E','TD9_A'], kind="scatter")


Out[65]:
<seaborn.axisgrid.PairGrid at 0x7fa6324dfd30>

In [66]:
sns.pairplot(table_for_correlations, vars=['DD51_A','DD24_D','DD9_E','TD51_A'], kind="scatter")


Out[66]:
<seaborn.axisgrid.PairGrid at 0x7fa6324df5c0>

Coverage values between samples seem to be quite correlated, although I have not plotted all pairwise correlations.

Overlays of the major variant frequency per time point, across replicates

Time point 3


In [67]:
# construction of the data table
siz = len(DD3_A ['Coverage'])
sample = len(DD3_A ['Coverage'])*["DD3A"]+len(DD3_D ['Coverage'])*["DD3D"]
overlay_table_concat_DD3 = pd.DataFrame ({'Position':pd.concat([DD3_A['Position'],DD3_D['Position']]), 'Major_variant_frequency_quality_corrected':pd.concat([DD3_A ['Major_variant_frequency_quality_corrected'],DD3_D ['Major_variant_frequency_quality_corrected']]), 'sample':sample})

In [68]:
sns.lmplot( x="Position", y="Major_variant_frequency_quality_corrected", data=overlay_table_concat_DD3, fit_reg=False, hue='sample', legend=False, size=7, aspect=2, lowess=True)
plt.legend(loc='lower right')
plot_positions()
plot_genes()


Time point 6


In [69]:
# construction of the data table
siz = len(DD6_A ['Coverage'])
sample = len(DD6_A ['Coverage'])*["DD6A"]+len(DD6_D ['Coverage'])*["DD6D"]+len(DD6_E ['Coverage'])*["DD6E"]
overlay_table_concat_DD6 = pd.DataFrame ({'Position':pd.concat([DD6_A['Position'],DD6_D['Position'],DD6_E['Position']]), 'Major_variant_frequency_quality_corrected':pd.concat([DD6_A ['Major_variant_frequency_quality_corrected'],DD6_D ['Major_variant_frequency_quality_corrected'],DD6_E ['Major_variant_frequency_quality_corrected']]), 'sample':sample})

In [70]:
sns.lmplot( x="Position", y="Major_variant_frequency_quality_corrected", data=overlay_table_concat_DD6, fit_reg=False, hue='sample', legend=False, size=7, aspect=2, lowess=True)
plt.legend(loc='lower right')
plot_positions()
plot_genes()