Analysis of the libraries compared to the clone sequence

Here we analyse the libraries with respect to the clone sequence in which mutations will be introduced. We print positions that differ from the clone and rise in frequency between the first and the last time points.



In [1]:

    
import pandas as pd
import matplotlib.pyplot as plt
from pylab import rcParams
import seaborn as sns
from array import array
import numpy as np
from scipy.stats import ttest_ind
from scipy.stats import linregress
%matplotlib inline

Obtaining the sequence annotation



In [2]:

    
begins=[]
ends=[]
names =[]
with open ("sequence.gb") as f:
    in_pep = False
    for l in f:
        if "mat_peptide" in l:
            begins.append(int(l.split()[1].split("..")[0]))
            ends.append(int(l.split()[1].split("..")[1]))
            in_pep = True
        elif in_pep :
            names.append(l.split("=")[1])
            in_pep = False
            
print(begins)
print(ends)
print(names)









    



[108, 474, 753, 978, 2490, 3546, 4224, 4614, 6465, 6846, 6915, 7668]
[473, 752, 977, 2489, 3545, 4223, 4613, 6464, 6845, 6914, 7667, 10376]
['"capsid"\n', '"propeptide"\n', '"membrane"\n', '"envelope"\n', '"NS1"\n', '"NS2A"\n', '"NS2B"\n', '"NS3"\n', '"NS4A"\n', '"2K"\n', '"NS4B"\n', '"NS5"\n']

Obtaining the clone sequence



In [3]:

    
file = "cloneSequence/SP6-ZIKV_seq_only.txt"
clone = ""
with open (file) as f:
    for l in f:
        if ">" in l:
            pass
        else:
            clone +=l.strip()

Functions to plot interesting positions and gene boundaries



In [4]:

    
# Interesting positions
positions=[316,1670,1785,2340,5935,7172,8449,9165]
def plot_positions():
    for x in positions:
        plt.axvline(x=x, linewidth=1, linestyle=':')
        
def plot_genes():
    for i in range(len(begins)):
        plt.plot([begins[i], begins[i]], [0.99,1.0], linewidth=2, linestyle='-', color="black")
        if i%2==0:
            plt.text (begins[i] + ((ends[i] - begins[i])/10), 1.005, (names[i].replace('"', ''))[0:3], size='xx-small')
        else:
            plt.text (begins[i] + ((ends[i] - begins[i])/10), 1.015, (names[i].replace('"', ''))[0:3], size='xx-small')
    plt.plot([ends[-1], ends[-1]], [0.99,1.0], linewidth=2, linestyle='-', color="black")

Functions to detect variants that differ from the clone and increase in frequency

Here we assume that the major variant is the state in the clone sequence, and we attempt to detect variants that are not in the clone but that increase through time.



In [5]:

    
def is_increasing(minor_frequencies):
    #print(minor_frequencies)
    previous = minor_frequencies[0]
    for m in range(1,len(minor_frequencies)):
        if previous < minor_frequencies[m]:
            #print(str(previous) + " < " + str(minor_frequencies[m]))
            previous = minor_frequencies[m]
        else:
            return False
    return True

def get_variant_frequency(variant, table, i):
    sum_of_bases = table['As_quality_corrected'][i]+table['Cs_quality_corrected'][i]+table['Gs_quality_corrected'][i]+table['Ts_quality_corrected'][i]+table['Ns_quality_corrected'][i]
    if variant == "A":
        return table["As_quality_corrected"][i] / sum_of_bases
    elif variant == "C":
        return table["Cs_quality_corrected"][i] / sum_of_bases
    elif variant == "G":
        return table["Gs_quality_corrected"][i] / sum_of_bases
    elif variant == "T":
        return table["Ts_quality_corrected"][i] / sum_of_bases
    else:
        return np.nan
        

def get_increasing_variants(tables, clone):
    num_tables = len(tables)
    first = tables[0]
    last = tables[num_tables-1]
    major = ""
    minor = ""
    major_frequencies = array('d',[0.0]*num_tables)
    minor_frequencies = array('d',[0.0]*num_tables)
    increasingVariants = dict()
    for i in first["Position"]:
        major = clone[i] #first["Major_variant"][i]
        #print(last['Major_variant_frequency_quality_corrected'][i])
        major_frequencies[0] = get_variant_frequency(major, first, i) 
        if major == last["Major_variant"][i]:
            minor = last["Second_variant"][i]
        else:
            minor = last["Major_variant"][i]
        minor_frequencies[0] = get_variant_frequency(minor, first, i)
        for table_id in range(1, num_tables):
            major_frequencies[table_id] = get_variant_frequency(major, tables[table_id], i)
            minor_frequencies[table_id] = get_variant_frequency(minor, tables[table_id], i)
        if is_increasing(minor_frequencies):
            increasingVariants[i] = [major, minor, major_frequencies.tolist(), minor_frequencies.tolist()]
    return increasingVariants

def print_variants(dict_variants):
    print("Position\tclone base\tincreasing variant\tFinal frequency")
    for k in dict_variants.keys():
        print(str(k)+"\t"+dict_variants[k][0]+"\t"+dict_variants[k][1]+"\t"+str(dict_variants[k][3][-1]))

Reading all data



In [6]:

    
# CirSeq initial sample
cirseq = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1CirseqD3_1_sequence.txt.assembled.fastq_mapped_AA.csv", na_values=" -nan")



In [7]:

    
# Control runs, replicate A
DD3_A = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD3A_1_sequence.txt.assembled.fastq_mapped_AA.csv", na_values=" -nan")
DD6_A = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD6A_1_sequence.txt.assembled.fastq_mapped_AA.csv", na_values=" -nan")
DD9_A = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD9A_1_sequence.txt.assembled.fastq_mapped_AA.csv", na_values=" -nan")
DD12_A = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD12A_1_sequence.txt.assembled.fastq_mapped_AA.csv", na_values=" -nan")
DD24_A = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD24A_1_sequence.txt.assembled.fastq_mapped_AA.csv", na_values=" -nan")
DD51_A = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD51A_1_sequence.txt.assembled.fastq_mapped_AA.csv", na_values=" -nan")
DD51_A_no_reamp = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD51Anoreamplification_1_sequence.txt.assembled.fastq_mapped_AA.csv", na_values=" -nan")



In [8]:

    
# Control runs, replicate D
DD3_D = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD3D_1_sequence.txt.assembled.fastq_mapped_AA.csv")
DD6_D = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD6D_1_sequence.txt.assembled.fastq_mapped_AA.csv")
DD9_D = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD9D_1_sequence.txt.assembled.fastq_mapped_AA.csv")
DD12_D = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD12D_1_sequence.txt.assembled.fastq_mapped_AA.csv")
DD24_D = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD24D_1_sequence.txt.assembled.fastq_mapped_AA.csv")



In [9]:

    
# Control runs, replicate E
DD6_E = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD6E_1_sequence.txt.assembled.fastq_mapped_AA.csv")
DD9_E = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1DD9E_1_sequence.txt.assembled.fastq_mapped_AA.csv")



In [10]:

    
# TLR3 activation runs, replicate A
TD9_A = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1TD9A_1_sequence.txt.assembled.fastq_mapped_AA.csv")
TD12_A = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1TD12A_1_sequence.txt.assembled.fastq_mapped_AA.csv")
TD24_A = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1TD24A_1_sequence.txt.assembled.fastq_mapped_AA.csv")
TD51_A = pd.read_csv ("HV5GLBCXY_ZIKV_17s006139-1-1_DREUX_lane1TD51A_1_sequence.txt.assembled.fastq_mapped_AA.csv")



In [11]:

    
#DD3_A.describe(include='all')

Positions that increase in frequency

Control, replicate A



In [12]:

    
tables_A = [DD3_A, DD6_A, DD9_A, DD12_A, DD24_A, DD51_A]
increasing_A = get_increasing_variants(tables_A, clone)
print("There are "+str(len(increasing_A))+" positions that rise in frequency.")
print("Those are:")
print_variants(increasing_A)









    



/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:15: RuntimeWarning: invalid value encountered in double_scalars
  from ipykernel import kernelapp as app
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:19: RuntimeWarning: invalid value encountered in double_scalars
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:21: RuntimeWarning: invalid value encountered in double_scalars






    



There are 92 positions that rise in frequency.
Those are:
Position	clone base	increasing variant	Final frequency
53	T	C	0.0014687147643982735
55	G	T	0.007534085284725918
138	T	C	0.001922840821068939
165	G	T	0.006671351295565951
173	C	T	0.003006519784370717
316	T	C	0.9257185433962264
332	G	T	0.0065362685648320765
357	T	C	0.002416925215029425
491	G	T	0.006092446714592004
824	T	C	0.0035788258135997075
901	T	C	0.0016021462336337357
1124	G	T	0.005141357751182855
1471	G	T	0.00665038967995197
1500	G	T	0.006259729268570128
1552	G	T	0.00582413787633224
1670	G	A	0.9422872506633152
1883	G	T	0.005708083729311799
1951	A	T	0.0010857796411378254
2039	G	T	0.006254807609563537
2101	G	T	0.004561467672983693
2102	C	T	0.0036324873615132206
2235	G	T	0.00525068769414528
2340	C	T	0.9517702913964549
2541	G	T	0.006747314640043678
2702	G	T	0.005630456156895833
2792	C	T	0.003261293410844263
2804	G	T	0.004986371013878872
2811	T	C	0.0015264351528296522
3087	A	C	0.0017289634956359449
3118	G	T	0.006253053863947506
3122	C	T	0.003243195555351485
3129	G	T	0.0067574983525088015
3563	G	T	0.007208932808283307
3604	G	T	0.00710220693807605
4001	G	T	0.0055527696113273056
4077	G	T	0.007704346537189801
4099	G	T	0.006331382611502932
4307	G	T	0.006097934177532599
4417	G	T	0.005203936955441444
4421	G	T	0.004423971315122885
4783	G	T	0.005009771458194474
4837	T	C	0.0036507001701765584
4900	G	T	0.00501963133097762
4904	T	C	0.0012587202958035525
5010	G	T	0.006879044433884693
5170	G	T	0.005479148374369285
5293	T	C	0.0013733190264127765
5321	A	T	0.001112200161501852
5536	C	T	0.0047133850272151325
5537	G	T	0.0066499016408061344
5542	G	T	0.006110399622712554
5553	T	C	0.0014656293229840449
5696	T	G	0.0028415076416638658
5782	T	C	0.002668610997246441
5817	A	C	0.003030378974067433
5880	T	C	0.0018187709472023443
5901	T	A	0.0011123867831042838
6397	G	T	0.006138143552213597
6424	G	T	0.005687857375959388
6459	A	C	0.001998318041912197
6525	T	C	0.002099409643570953
6724	T	C	0.0018246651368251958
6735	G	T	0.0046651531790311
6751	G	T	0.005824683122668878
6804	T	C	0.0011336708032008219
6866	G	T	0.0062069921569629935
6891	G	T	0.006617866758241758
6993	G	T	0.007441144946384966
7251	T	C	0.002121198377468238
7305	C	T	0.002535200586128297
7338	G	T	0.00710603405818938
8219	T	C	0.001508430173775671
8449	T	C	0.9278776890748103
8466	T	C	0.02216480910037949
8826	G	T	0.006678434157029546
8850	G	T	0.006137409467929184
8952	A	C	0.002647146518482474
9079	C	T	0.0036167201834313594
9142	C	T	0.002983858309884723
9251	G	T	0.004899158721768481
9297	G	T	0.005444645678862284
9607	C	T	0.003108899778153903
9978	G	T	0.005807032348341425
10025	G	T	0.0044327439115236945
10153	G	T	0.003973514219896262
10360	T	C	0.0026532672370486775
10405	G	T	0.0046594830298306526
10433	G	T	0.004945853785912818
10457	G	T	0.006337990309459244
10618	G	T	0.007520543473961767
10630	G	T	0.0054867526174787705
10633	G	T	0.005026616402357473






    



/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:17: RuntimeWarning: invalid value encountered in double_scalars

Control, replicate D



In [13]:

    
tables_D = [DD3_D, DD6_D, DD9_D, DD12_D, DD24_D]
increasing_D = get_increasing_variants(tables_D, clone)
print("There are "+str(len(increasing_D))+" positions that rise in frequency.")
print("Those are:")
print_variants(increasing_D)









    



/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:15: RuntimeWarning: invalid value encountered in double_scalars
  from ipykernel import kernelapp as app
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:19: RuntimeWarning: invalid value encountered in double_scalars
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:21: RuntimeWarning: invalid value encountered in double_scalars






    



There are 318 positions that rise in frequency.
Those are:
Position	clone base	increasing variant	Final frequency
40	T	G	0.0019018145805032665
138	T	G	0.0017308345476746512
164	G	T	0.0028747315274068423
176	G	T	0.003746452972916831
188	G	T	0.032248881943154374
207	C	T	0.005254698946753452
243	T	G	0.0025127258345428154
254	A	C	0.0012103183990453327
263	T	C	0.0014386049439938202
295	G	T	0.006107744558710584
298	T	C	0.001168757010650079
433	T	G	0.0029516184598376957
466	T	C	0.0024415496100519923
531	G	T	0.005232150497286363
533	G	A	0.08506827845455946
535	G	T	0.0047124831467124624
548	C	A	0.00493994243614136
570	A	C	0.0023983467340025367
599	C	A	0.0038547210141864495
664	C	A	0.003402638792391993
703	G	T	0.0051156132164690785
731	G	T	0.004592770088631973
740	A	C	0.0015919764628898891
796	G	T	0.003995424416014257
860	T	G	0.0018815363575994026
957	T	G	0.00163585699634353
997	C	A	0.004407071762377561
1001	A	G	0.0013013758799981594
1110	T	G	0.0018698757108824436
1118	A	C	0.002007284680139468
1179	T	G	0.0016005767583004392
1284	A	C	0.0015901763364048385
1305	A	C	0.0017960425537715018
1346	A	C	0.001356648827492936
1355	G	T	0.004336778174734599
1373	G	T	0.004709751477455881
1390	G	T	0.005192604104378953
1434	T	G	0.0020804375743845166
1501	A	C	0.002654211472365691
1545	G	T	0.00440933713967414
1556	A	C	0.0019001064164434387
1623	A	C	0.001816804714182995
1635	A	C	0.001800065615319628
1748	C	A	0.004927673831020078
1784	G	T	0.005044935305758472
1809	A	C	0.002118775744511351
1916	T	C	0.393273332972583
1937	A	C	0.0023267202986575887
1968	A	C	0.0018202668830734226
1973	G	T	0.004449331243046456
2042	G	T	0.005798259416551
2044	G	T	0.005006377954636109
2048	T	C	0.0016834749447659086
2069	A	G	0.2887551102587314
2086	G	T	0.005087511188501996
2094	A	C	0.001838781341752427
2105	G	T	0.005236468002725754
2107	A	C	0.0025605623944727628
2121	T	G	0.0028933955982864057
2185	T	C	0.09595135553773552
2187	G	T	0.004034601809788786
2200	A	G	0.004311296557410842
2233	G	T	0.003350855424036046
2244	T	G	0.0027810142606833448
2249	G	T	0.005277779568331397
2280	G	T	0.004975433706391556
2287	T	G	0.00209308620151165
2288	C	A	0.0052514276853611866
2361	G	T	0.0050964760534909035
2375	C	A	0.0043082365805168985
2402	G	T	0.005660311589221887
2406	T	G	0.0017003598868962403
2426	A	G	0.0010217074055849428
2427	T	G	0.0025944923307369685
2433	T	G	0.0022372360795904363
2435	A	G	0.0015309848429000267
2446	C	A	0.004688323706835129
2450	G	T	0.005944345927076633
2474	A	C	0.001749327920820513
2510	T	C	0.001979800768162131
2511	T	G	0.002255276147202096
2518	G	T	0.003885431550548059
2521	G	T	0.004460617559946444
2526	C	A	0.004385299055940686
2534	G	T	0.004350590178237081
2572	G	T	0.004990214346344899
2607	G	T	0.005101364937904858
2698	A	C	0.001013791667967896
2707	G	T	0.0044310382167526315
2735	G	T	0.004390610007456436
2754	T	G	0.0020864704459267035
2757	T	G	0.0027371800701051583
2804	G	T	0.003852715152624612
2816	G	T	0.004484453657680837
2820	T	G	0.0028499994255646935
2824	C	A	0.006833416444648599
2828	G	T	0.005239220278029617
2833	G	T	0.004369636635917095
2834	A	C	0.002417630421813475
2843	G	T	0.004880441730941658
2919	C	A	0.004158668770980518
2925	A	C	0.0027269980022197556
2930	A	C	0.0026222460134042056
2973	T	G	0.0024084260456041944
2989	C	A	0.0051446633252887975
2991	G	T	0.00570568865652031
3014	T	G	0.0031771380492495636
3039	T	G	0.0030488481287730125
3080	C	A	0.004312312298677792
3123	T	G	0.0016877884796171268
3242	A	C	0.002385986653464284
3274	C	A	0.005043234478880695
3381	T	G	0.0015957298463773687
3391	C	A	0.0047330242728920325
3397	A	G	0.0037040838200660844
3523	A	G	0.002355818347744195
3541	T	G	0.0017750882450370464
3546	G	T	0.0049322949995100765
3595	G	T	0.005359673112180589
3628	G	T	0.004061607236990737
3722	T	G	0.0013548613168685702
3729	G	T	0.004937938068094019
3738	T	G	0.002016696121937103
3743	G	T	0.0044457957100009085
3754	T	G	0.002102644511676279
3837	A	C	0.0015554424313391933
3872	G	T	0.006095923917185291
3883	T	C	0.0015445687790228845
3926	C	A	0.004234732057525145
3942	C	A	0.0039974732304327715
3968	A	C	0.0021085958821484797
3976	T	G	0.0016148094762092132
4018	G	T	0.005441041970802338
4062	G	T	0.005386113532004775
4116	T	G	0.0024685261659017944
4160	G	T	0.004444534548789039
4164	G	T	0.00375967765000088
4168	G	T	0.004675843356577826
4179	T	G	0.0025058879130987056
4181	A	C	0.0024610282302082874
4190	G	T	0.006342749769040713
4197	T	G	0.0028035435611710423
4209	G	T	0.005274898516845103
4221	G	T	0.003988841168416248
4273	G	T	0.0050496194567062795
4355	T	G	0.012331091795292237
4475	T	G	0.0025793201830994405
4483	G	T	0.0058630033772339085
4536	C	A	0.00433595330533693
4537	C	A	0.005151219329752828
4576	T	G	0.002626149794441126
4606	A	G	0.00673347984347728
4634	C	A	0.0049976211248974126
4706	G	T	0.005576790739222119
4799	G	T	0.00448067339178198
4818	G	T	0.005288859671709044
4879	G	T	0.0061008514212113315
4881	A	C	0.0020181372715708124
4913	C	A	0.004506133837990243
4981	T	G	0.0015003969404857936
5119	G	T	0.004277056139245146
5126	G	T	0.00353838603459134
5159	A	C	0.001647826551376403
5198	C	A	0.004472288451463213
5269	T	G	0.001768790278378768
5278	A	G	0.0010525426767744766
5300	G	T	0.004351028501143114
5388	A	C	0.002501610197483447
5466	A	C	0.0015252583757378387
5595	A	C	0.002201283130468788
5598	C	A	0.00427228648104663
5600	A	C	0.0020157515718239546
5612	A	C	0.002080241271655967
5636	G	T	0.005691833989852835
5638	G	T	0.005148247673456851
5640	G	T	0.005183543915408594
5660	G	T	0.006422811767694176
5693	T	G	0.0017801726759064539
5697	T	G	0.0020064544731911407
5763	T	G	0.001832860051724766
5776	A	C	0.002224432530913877
5812	T	C	0.00507705381553585
5921	G	T	0.004537281007367108
5969	G	T	0.005027674367893671
5971	T	G	0.0014192370329785017
5992	A	C	0.0013065231850582421
6020	G	T	0.004437915726396715
6101	A	C	0.0017368710139615475
6105	A	C	0.002745938976548118
6110	C	A	0.005652109234617677
6174	A	C	0.0017864683071676369
6278	T	G	0.001271190357459663
6282	C	A	0.004045560221854576
6286	T	C	0.0033034845025853043
6302	G	T	0.00410310563698534
6346	A	C	0.0014996682255935601
6418	T	G	0.002227310501895547
6419	C	A	0.00407084803704687
6462	G	T	0.0035440367593641935
6473	T	G	0.0028171984154977347
6522	G	T	0.004700318133009429
6537	T	G	0.0018035175133448275
6557	A	G	0.001961857379993409
6558	T	C	0.1655673875481599
6568	G	T	0.004783533665488444
6582	C	A	0.005279746163651517
6634	T	A	0.009181524888759847
6658	G	T	0.005500523697198431
6690	A	C	0.0018635292608815753
6692	G	T	0.004376513680742496
6711	T	G	0.002084377499510859
6723	C	A	0.0037226268801364556
6725	C	A	0.0035943098433586303
6729	G	T	0.0047518841426858515
6774	G	T	0.004920364579492717
6807	T	G	0.0021079122420827098
6883	A	C	0.0021119123397284082
6887	G	T	0.0043384307015276145
6893	C	T	0.009222039226385167
6936	G	T	0.005377673387282174
6944	A	C	0.001413762517031607
6953	A	C	0.0019753317846766026
6971	A	C	0.001781715049767115
7105	C	A	0.0030265098684210527
7159	A	C	0.0016990035751163072
7172	T	C	0.928893697444451
7207	A	C	0.0016313606148183077
7285	C	A	0.004686171341363372
7286	C	A	0.0045880218521856565
7294	G	T	0.004213353621768296
7308	C	A	0.004329491827537499
7325	A	C	0.0022590814222898247
7377	C	A	0.004030066600461511
7390	A	C	0.0024424983681276494
7420	G	T	0.004675576466904138
7666	T	G	0.004292077761761639
7740	A	C	0.0030233490612921036
7760	A	C	0.002091829455306796
7822	C	T	0.025467171270718233
7867	G	T	0.003990205721928849
7901	G	T	0.004658284941005335
7956	T	G	0.0035481384081399773
7978	A	C	0.0018632696625393466
8098	G	T	0.005240354085705283
8119	A	C	0.0018281966040255653
8125	T	G	0.002439714161123446
8136	A	C	0.0018529639299522402
8153	A	C	0.0016783351956586449
8157	T	G	0.0028069726261561877
8174	G	T	0.005077393091777822
8214	T	G	0.0027715747599871415
8298	T	G	0.00217232353902658
8376	G	T	0.0034877001935147605
8417	G	T	0.003432658807106599
8455	G	T	0.004582184534731325
8472	G	A	0.017865213637327678
8477	G	T	0.0040033016873607125
8480	G	T	0.003718904423109169
8626	T	G	0.002281533548901718
8642	G	T	0.004366187315986312
8680	A	C	0.0022282667701327813
8702	G	T	0.004446951711672252
8724	A	C	0.0017187055841781618
8727	G	T	0.005173431395301987
8742	A	C	0.0017729578983232339
8779	T	C	0.0022643615195125115
8851	T	C	0.001420186594739858
8867	A	C	0.0018162210025257435
8874	A	C	0.0015440202888450068
8875	G	A	0.01260671079983681
8896	A	C	0.0011825526197074177
8912	G	T	0.004883061219565165
9081	G	T	0.0040639697339642945
9149	G	T	0.004802676253675515
9153	A	C	0.0026342846100630636
9186	G	T	0.003883786275598142
9209	A	C	0.0031611146052921675
9262	T	G	0.002206064505497281
9395	A	C	0.002268253800450727
9415	A	C	0.0027229511356308654
9429	G	T	0.003630641451260173
9439	T	G	0.002677437738440409
9468	G	T	0.005711874706442426
9516	T	G	0.002634136298558748
9529	C	A	0.004086898059531127
9539	A	C	0.0014901340599039708
9585	G	T	0.00464358873200056
9589	G	T	0.004881474185215383
9592	A	C	0.001603663485477178
9620	A	C	0.0015101213848683495
9654	G	T	0.00382040281161448
9666	G	T	0.004954184516810018
9671	G	T	0.0043939794990050275
9679	A	C	0.002061862590204799
9771	G	T	0.005258677015308852
9807	A	C	0.002340914178403756
9838	G	T	0.005301321872668491
9943	A	G	0.02615260290656405
10004	G	T	0.004503949942682462
10019	G	T	0.004425485356998412
10028	G	T	0.005870428396460394
10077	A	C	0.0020146078921172736
10153	G	T	0.003271683209984037
10167	A	C	0.0022684949411578234
10341	A	C	0.002093133353877843
10361	A	C	0.0016429936232995755
10444	G	T	0.005522530484140233
10446	C	A	0.003941363486930509
10466	A	C	0.0019555810201584336
10531	C	A	0.0052346614838176585
10538	G	T	0.005759799342861082
10558	C	A	0.004763168478749214
10572	C	A	0.004043581311615945
10606	A	C	0.002585049358134277
10642	T	G	0.002160184954243675
10655	G	T	0.0053016742996795495
10711	T	C	0.003080578244760827
10724	A	C	0.0060895978211529716

Control, replicate E



In [14]:

    
tables_E = [DD6_E, DD9_E]
increasing_E = get_increasing_variants(tables_E, clone)
print("There are "+str(len(increasing_E))+" positions that rise in frequency.")
print("There are too many of them, we choose not to print them.")









    



/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:15: RuntimeWarning: invalid value encountered in double_scalars
  from ipykernel import kernelapp as app
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:19: RuntimeWarning: invalid value encountered in double_scalars
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:21: RuntimeWarning: invalid value encountered in double_scalars






    



There are 6381 positions that rise in frequency.
There are too many of them, we choose not to print them.

TLR3 treatment



In [15]:

    
tables_TA = [TD9_A, TD12_A, TD24_A, TD51_A]
increasing_TA = get_increasing_variants(tables_TA, clone)
print("There are "+str(len(increasing_TA))+" positions that rise in frequency.")
print("Those are:")
print_variants(increasing_TA)









    



/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:15: RuntimeWarning: invalid value encountered in double_scalars
  from ipykernel import kernelapp as app
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:19: RuntimeWarning: invalid value encountered in double_scalars
/home/boussau/anaconda3/lib/python3.6/site-packages/ipykernel_launcher.py:21: RuntimeWarning: invalid value encountered in double_scalars






    



There are 241 positions that rise in frequency.
Those are:
Position	clone base	increasing variant	Final frequency
84	T	G	0.0027281173987600575
137	T	G	0.002666240806892203
146	G	T	0.005028061570557455
158	A	C	0.0017398343722252853
168	T	G	0.002136607688947138
226	T	G	0.003627324416051108
233	C	A	0.00577179962442612
269	A	C	0.0023387078857982193
285	A	C	0.0008862968959656499
305	A	C	0.0023289246747486697
322	T	G	0.9927920378671307
415	A	G	0.0017496143646926541
461	A	T	0.0019677944312796207
486	G	T	0.00390509248634693
487	A	C	0.001792997585087918
520	A	C	0.0013100014923567136
580	A	C	0.0020633351206112817
628	A	C	0.0016085417864187095
629	T	G	0.002002077761463745
647	G	T	0.006202388375634518
649	G	T	0.006710822568105718
719	C	A	0.00516901130372524
752	G	T	0.0038576888604680906
832	G	T	0.004925429231517509
856	G	T	0.004877761081021332
877	C	A	0.003958647604943897
916	C	A	0.004438554339703213
917	T	C	0.0014235287145781706
919	A	C	0.0024663131570563874
941	T	G	0.0025523109200046175
958	G	T	0.004672449352374627
974	A	C	0.0017093287619412356
1003	G	T	0.005394382186666667
1028	G	T	0.00579541230318293
1085	G	T	0.004587398201629772
1088	A	C	0.0014721976679734604
1121	A	C	0.0019014561259752753
1137	C	A	0.004122392159490702
1149	C	A	0.005092141592358709
1181	G	T	0.0053406914412149255
1205	C	A	0.005249529888318624
1319	A	C	0.0016255588994589646
1321	A	C	0.0014414416422677667
1328	A	C	0.0013022678071043271
1339	C	A	0.003695924450002867
1344	A	C	0.0011353150657843142
1392	T	C	0.000917113738083799
1403	G	T	0.0046971111220528204
1445	G	T	0.005761638602362697
1524	T	G	0.0022439262720114934
1525	T	G	0.001933017312773565
1546	T	G	0.0015646858217220506
1618	C	A	0.003637165291878172
1620	A	C	0.001361960783741754
1622	G	T	0.005776680231292941
1633	C	T	0.004616060398766826
1745	G	T	0.004205646961142478
1747	T	G	0.002595925711369146
1750	A	G	0.002042998771868264
1756	T	C	0.0023478402820734873
1874	T	C	0.37029980964175957
1888	A	G	0.00199536424335808
1903	T	C	0.9919414710300696
2117	C	A	0.005664858277486498
2193	C	A	0.005020830767551042
2243	G	T	0.003315185789977404
2265	A	C	0.0025116592338835257
2309	A	C	0.0021977923240938165
2356	G	T	0.0037914620076501607
2373	T	C	0.0016614554084582248
2409	A	C	0.0013285445139199048
2425	T	C	0.001723658384643245
2428	T	C	0.0014337534877599367
2435	A	C	0.0017672829276713278
2442	T	G	0.0020533678869348064
2472	C	A	0.0048760429952445835
2492	G	T	0.004597391209122713
2495	G	T	0.005743461138180612
2503	G	T	0.005602001258005405
2598	A	C	0.0028320639509417443
2609	A	C	0.002755631917312981
2720	A	C	0.0020589584574264986
2733	A	C	0.0013249545992893804
2864	G	T	0.00412824949385394
2873	A	C	0.002328226524810475
2950	T	G	0.0031852326717786796
3007	A	C	0.0015461269431164455
3145	G	T	0.005581068243025531
3258	G	T	0.005309083816004283
3300	A	C	0.0017235332425686407
3417	A	C	0.00207655468583737
3482	G	T	0.006501260232488357
3547	A	C	0.0018640931709798855
3553	T	C	0.0022610478894411995
3608	G	T	0.004876659993007985
3641	A	C	0.0012615958617472665
3644	A	G	0.001191142836192115
3658	G	T	0.004876231352718078
3696	T	G	0.0015796776382333106
3709	T	C	0.9694652640008212
3746	A	C	0.001822596763028645
3797	G	T	0.0051449708557869305
3805	A	C	0.0022277640244601706
3831	G	T	0.004958902276551181
3837	A	C	0.00159498991059567
3890	A	C	0.001869172010835328
3904	C	A	0.003189475941329919
4084	T	C	0.0020774912555364435
4288	C	A	0.004968917341242149
4302	A	C	0.0018772266707195275
4347	A	C	0.0016992661486380483
4360	A	C	0.0021502643428764653
4363	G	T	0.004424375326915585
4474	C	A	0.0030664358382516215
4502	A	C	0.002288549023448431
4564	A	C	0.002067866054477121
4606	A	G	0.971118203777336
4797	A	C	0.0017993505452212955
4824	A	C	0.0019661647768986048
4836	A	C	0.0018394432160434879
4842	T	G	0.00229063753653147
4849	C	A	0.003707760094433237
4897	G	T	0.005401234351825866
4900	G	T	0.004388112333881942
4925	A	C	0.0018407995181167769
4974	G	T	0.005618968729470503
5032	A	C	0.0011634982473302355
5056	A	C	0.0024688653599734237
5108	A	C	0.0017908561173748917
5186	T	C	0.002478945974270967
5252	A	C	0.0015304791057250315
5278	A	G	0.0008396163083164302
5307	T	G	0.001664577159337778
5332	A	G	0.006619761838693569
5449	T	C	0.0025496420222092344
5494	T	C	0.0018130616264294792
5691	T	G	0.003079049645833223
5747	G	T	0.004945552194250888
5821	G	T	0.006429880670148464
5848	A	G	0.0017728766548803546
5887	T	C	0.0026264455141919835
5934	T	C	0.1637754063464081
5935	T	C	0.010919942485783917
6032	G	T	0.005133693837369562
6037	T	G	0.003756577521963036
6087	T	G	0.0015731624804987106
6317	A	C	0.0017464152212773275
6505	A	C	0.0017961393784234343
6551	G	T	0.004661980996926367
6553	G	T	0.006329484911829342
6562	G	T	0.004932992496129753
6588	A	G	0.0031455332382801637
6593	G	T	0.004699926483433297
6682	G	T	0.006254361085556576
6691	G	T	0.005309044116161359
6701	A	G	0.002443390714428658
6708	G	T	0.005125660430558947
6737	G	T	0.005026724450294209
6760	A	C	0.0016210124401913877
6793	T	G	0.0022566472177360155
6812	G	T	0.005697975667455221
6813	T	G	0.002710237857578594
6880	G	T	0.006829484956406007
6882	T	G	0.00203647408492994
6934	G	T	0.004490593372820743
6961	A	C	0.0015236904895044043
6962	A	C	0.0017777266928298235
7057	T	G	0.0012609058908189578
7132	A	C	0.0015060056571934823
7177	A	C	0.001930370332103043
7184	T	G	0.002154764819338353
7234	G	T	0.00522295326197287
7345	G	T	0.004300912246446122
7396	A	C	0.0016491583907437917
7438	C	A	0.004589942667819072
7474	G	T	0.008005567285833102
7533	G	T	0.00601545784824912
7557	G	T	0.004636042809767863
7607	T	C	0.0013270127128473994
7725	C	A	0.004675789511946881
7745	T	G	0.0022876801876707442
7781	G	T	0.00348564163862145
7872	G	T	0.0037950894582909457
7891	A	C	0.0024558841077261515
7976	A	C	0.0023751362064794875
8160	T	G	0.0019510445447986533
8218	G	T	0.005206527685397881
8243	G	T	0.004631126389023093
8279	G	T	0.0056075422536913415
8280	G	T	0.006642674265816748
8311	T	C	0.008673610483401283
8320	G	T	0.005582040634586329
8325	A	C	0.0016274566431645453
8353	C	A	0.006680269864141622
8354	A	C	0.0023682601488498028
8460	C	A	0.00544569684066173
8519	A	C	0.9743558922477839
8529	A	G	0.0013415847641199104
8663	T	G	0.0026572304258485075
8673	T	G	0.0026403624691865237
8702	G	T	0.004673721284796572
8851	T	C	0.0017467808605471213
8886	A	C	0.0016302828209869412
8887	T	C	0.0028452076788830715
9033	T	G	0.004466114944968316
9087	C	A	0.0039046158830953743
9135	T	G	0.0020141793486957915
9152	G	T	0.006469033668268406
9175	C	T	0.004267592098487757
9193	A	C	0.0012397019765287213
9209	A	C	0.0021935137463685553
9226	A	C	0.0013512586958872507
9302	T	G	0.0022057069866851515
9316	T	C	0.0012436597413238138
9375	T	C	0.0011133623955589696
9553	A	C	0.0014278422605813156
9587	A	C	0.0015721144908705216
9602	A	C	0.0016456402424530542
9620	A	C	0.0014782098439519824
9622	C	A	0.0036330519291569914
9625	A	G	0.0013405137640467686
9633	G	T	0.004109216675879865
9654	G	T	0.005124034177748112
9727	A	G	0.0022094515522800067
9740	G	T	0.0056004977318160854
9930	G	T	0.005006332412041586
9964	C	A	0.005731033207102116
9990	G	T	0.00539667251316993
10159	A	C	0.002311821562550158
10177	C	A	0.0038549765308846145
10292	A	C	0.001151357438016529
10360	T	C	0.0028100633363068693
10389	T	C	0.004393501906108891
10394	T	C	0.0010855659320477505
10445	A	C	0.0030354844933128317
10455	G	T	0.007646953444164882
10525	G	T	0.005753733536330095
10534	A	C	0.0016767122057422777
10629	T	G	0.0014651987687953189
10644	G	T	0.005695280323687222
10757	G	T	0.007749460249415432



In [ ]: