In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline
In [2]:
#from __future__ import division
import pandas as pd
import numpy as np
from ggplot import *
import os
import sys
utils_path = os.path.abspath(os.path.join('..'))
if utils_path not in sys.path:
sys.path.append(utils_path)
from utils.rpos import *
In [3]:
# Sample titles with corresponding barcodes
# s9: WT
# s9+bcm: WT +BCM
# s17: triple sRNA mutant
samples = {
's9': ['ATCACG', 'ACAGTG'],
's9+bcm': ['CGATGT', 'GCCAAT'],
's17': ['TTAGGC', 'GATCAG'],
}
# Barcodes
barcodes = ['ATCACG', 'ACAGTG', 'CGATGT', 'GCCAAT', 'TTAGGC', 'GATCAG']
In [4]:
def get_counts_df(barcodes, res_dir='../results'):
'''
Aggregates htseq-count results into raw counts dataframe.
Iterates over files whose name ends with '.counts' in `res_dir` and contains barcodes
specified in `barcodes`.
Builds a dataframe containing gene names and counts for the gene for every barcode.
'''
df = pd.DataFrame()
d, _, filenames = next(os.walk(res_dir))
infiles = [f for f in filenames if f.endswith('.counts')]
for barcode in barcodes:
cntfile = os.path.join(d, [f for f in infiles if barcode in f][0])
df_ = pd.read_csv(cntfile, sep='\t', header=None, names=['gene','counts'])
if df.empty:
df['gene'] = df_['gene']
df[barcode] = df_['counts']
return df[~df['gene'].str.startswith('__')]
def get_utr_counts_df(df, barcodes, res_dir='../results'):
'''
Calculates 5'UTR coverage
Iterates over files whose name ends with '_sorted.bam' in `res_dir` and contains barcodes
specified in `barcodes`.
Adds `utr_<barcode>` column to df DataFrame
'''
d, _, filenames = next(os.walk(res_dir))
infiles = [f for f in filenames if f.endswith('_sorted.bam')]
for barcode in barcodes:
bamfile = os.path.join(d, [f for f in infiles if barcode in f][0])
#df['utr_{0}'.format(barcode)] = get_coverage(df, bamfile, 'gi|556503834|ref|NC_000913.3|',
# key5='coord_5', key3='coord_3')
df['utr_{0}'.format(barcode)] = get_counts(df, bamfile, 'gi|556503834|ref|NC_000913.3|',
key5='start', key3='end')
In [6]:
# htseq counts for genes (orfs)
count_df = get_counts_df(barcodes, res_dir='../results')
# 5' UTR annotations
res = []
with open('../ref/utrs_corr.bed', 'r') as fi:
for line in fi:
fields = line.strip().split()
res.append({
'gene': fields[3],
'start': int(fields[1]),
'end': int(fields[2]),
})
df_utr5 = pd.DataFrame.from_records(res)
df_utr5['UTR_length'] = abs(df_utr5['end'] - df_utr5['start'])
df_utr5 = df_utr5[df_utr5['UTR_length'] > 80]
# Merge the two on gene column
dfm = df_utr5.copy()
dfm = dfm.merge(count_df, on='gene')
# Count UTR hits and add them to the dataframe
get_utr_counts_df(dfm, barcodes, res_dir='../results')
In [7]:
dfm
Out[7]:
end
gene
start
UTR_length
ATCACG
ACAGTG
CGATGT
GCCAAT
TTAGGC
GATCAG
...
utr_CGATGT
utr_GCCAAT
utr_TTAGGC
utr_GATCAG
utr_TGACCA
utr_TAGCTT
utr_CAGATC
utr_GGCTAC
utr_ACTTGA
utr_CTTGTA
0
5234
yaaX
5030
204
258
281
512
481
178
328
...
351.0
370.0
298.0
448.0
439.0
446.0
137.0
235.0
479.0
450.0
1
6587
yaaA
6459
128
885
819
688
768
677
1142
...
163.0
225.0
175.0
315.0
181.0
281.0
206.0
114.0
388.0
305.0
2
6615
yaaA
6459
156
885
819
688
768
677
1142
...
172.0
236.0
175.0
322.0
191.0
303.0
207.0
116.0
411.0
329.0
3
11542
yaaW
11356
186
36
63
1512
1942
36
74
...
504.0
776.0
12.0
36.0
567.0
1056.0
12.0
24.0
1505.0
1546.0
4
11913
yaaI
11786
127
24
40
581
926
31
51
...
304.0
356.0
14.0
23.0
389.0
399.0
8.0
13.0
458.0
440.0
5
11938
yaaI
11786
152
24
40
581
926
31
51
...
340.0
419.0
14.0
24.0
438.0
510.0
8.0
15.0
565.0
528.0
6
12163
dnaK
12048
115
36348
53503
62444
70306
39515
48358
...
2977.0
6584.0
4799.0
2830.0
6350.0
6968.0
2336.0
6099.0
15976.0
14174.0
7
17489
nhaA
17317
172
1646
1086
2994
3830
826
1366
...
792.0
1358.0
133.0
215.0
1301.0
1908.0
74.0
137.0
2949.0
2223.0
8
21210
rpsT
21078
132
67237
50862
25383
18595
27532
62067
...
9302.0
8966.0
14726.0
24465.0
10912.0
9380.0
23735.0
17554.0
7045.0
5706.0
9
22391
ileS
21833
558
20377
16745
9326
10862
15253
19726
...
1739.0
2728.0
5454.0
5556.0
2509.0
3155.0
4785.0
5718.0
6382.0
5578.0
10
22391
ileS
22034
357
20377
16745
9326
10862
15253
19726
...
1369.0
2398.0
4524.0
4748.0
2043.0
2715.0
4188.0
5051.0
5825.0
5171.0
11
22391
ileS
22229
162
20377
16745
9326
10862
15253
19726
...
1043.0
2079.0
3899.0
4030.0
1715.0
2298.0
3665.0
4388.0
5149.0
4749.0
12
25207
lspA
25014
193
2524
3886
1573
1475
1645
3350
...
1189.0
1295.0
1949.0
2738.0
1182.0
1692.0
2208.0
3284.0
2990.0
2404.0
13
28374
dapB
28288
86
1753
1534
570
1171
2320
2508
...
75.0
103.0
122.0
257.0
142.0
138.0
133.0
131.0
194.0
221.0
14
29651
carA
29551
100
706
529
938
1119
348
616
...
72.0
116.0
61.0
63.0
124.0
141.0
130.0
61.0
206.0
212.0
15
34300
caiF
34218
82
67
111
279
649
68
95
...
39.0
119.0
10.0
17.0
102.0
174.0
15.0
14.0
441.0
338.0
16
35499
caiE
35371
128
60
97
288
511
33
107
...
204.0
260.0
5.0
19.0
202.0
426.0
17.0
19.0
908.0
833.0
17
42037
caiT
41931
106
17
34
452
822
21
46
...
22.0
63.0
1.0
3.0
33.0
88.0
1.0
2.0
105.0
144.0
18
45807
yaaU
45592
215
40
101
621
1166
23
130
...
320.0
371.0
5.0
29.0
298.0
863.0
2.0
19.0
1138.0
1221.0
19
47246
kefF
47080
166
142
178
381
641
168
218
...
114.0
188.0
36.0
101.0
102.0
318.0
40.0
47.0
481.0
420.0
20
52034
apaG
51606
428
1989
1683
1450
1578
983
2255
...
3292.0
3141.0
2270.0
3686.0
3032.0
3883.0
3699.0
3152.0
4924.0
4449.0
21
52588
rsmA
52430
158
4636
4017
3939
3937
2355
4938
...
1129.0
1430.0
905.0
2211.0
982.0
1794.0
1888.0
1506.0
2132.0
1719.0
22
57241
lptD
57109
132
14342
11620
5975
6982
10598
17089
...
996.0
1071.0
2016.0
2770.0
1035.0
1578.0
2130.0
1373.0
1517.0
1384.0
23
57336
lptD
57109
227
14342
11620
5975
6982
10598
17089
...
1047.0
1152.0
2130.0
2882.0
1135.0
1653.0
2279.0
1468.0
1641.0
1495.0
24
57364
djlA
57268
96
921
750
1022
1102
664
930
...
103.0
146.0
134.0
139.0
159.0
183.0
205.0
134.0
231.0
247.0
25
60450
rluA
60346
104
2420
1440
2574
2018
1580
1914
...
884.0
747.0
544.0
623.0
882.0
816.0
1444.0
709.0
1012.0
1013.0
26
70387
araC
70241
146
208
169
659
837
838
707
...
177.0
295.0
252.0
187.0
202.0
353.0
53.0
114.0
614.0
753.0
27
73085
thiQ
72927
158
227
242
567
677
138
304
...
349.0
463.0
114.0
234.0
357.0
547.0
173.0
198.0
698.0
728.0
28
79594
leuD
79453
141
120
204
217
308
72
183
...
71.0
88.0
31.0
67.0
49.0
125.0
62.0
93.0
221.0
181.0
29
84024
leuL
83708
316
128
330
93
150
332
500
...
94.0
222.0
256.0
294.0
180.0
268.0
79.0
155.0
420.0
334.0
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1657
4591367
yjiY
4591279
88
3923
70646
17206
48114
34677
71551
...
966.0
3054.0
2274.0
4119.0
1932.0
4979.0
175.0
7342.0
2886.0
6604.0
1658
4591657
tsr
4591412
245
142
556
16167
16399
215
1101
...
1993.0
3184.0
46.0
184.0
2796.0
5219.0
25.0
72.0
5830.0
6055.0
1659
4594837
yjjL
4594722
115
178
603
15365
17051
238
861
...
1382.0
2343.0
32.0
113.0
2653.0
2898.0
12.0
70.0
9899.0
10182.0
1660
4594844
yjjL
4594722
122
178
603
15365
17051
238
861
...
1391.0
2363.0
32.0
113.0
2684.0
2942.0
12.0
70.0
9941.0
10220.0
1661
4594882
yjjL
4594722
160
178
603
15365
17051
238
861
...
1680.0
2946.0
44.0
126.0
3153.0
3743.0
15.0
81.0
11082.0
11199.0
1662
4595947
yjjM
4595851
96
70
297
10638
15333
407
515
...
794.0
1448.0
104.0
56.0
1681.0
1546.0
12.0
36.0
2086.0
2586.0
1663
4600337
yjjA
4600189
148
935
1130
8250
7085
673
1179
...
3669.0
3562.0
236.0
299.0
4702.0
4019.0
285.0
275.0
2957.0
4121.0
1664
4601615
dnaT
4601517
98
804
847
9333
9941
364
929
...
5136.0
5578.0
190.0
443.0
4980.0
7865.0
291.0
317.0
4881.0
5141.0
1665
4601615
dnaT
4601517
98
804
847
9333
9941
364
929
...
5136.0
5578.0
190.0
443.0
4980.0
7865.0
291.0
317.0
4881.0
5141.0
1666
4602177
yjjB
4601950
227
32
67
5565
5650
74
84
...
6396.0
5060.0
55.0
76.0
6203.0
4801.0
14.0
65.0
3217.0
4483.0
1667
4602177
yjjB
4601950
227
32
67
5565
5650
74
84
...
6396.0
5060.0
55.0
76.0
6203.0
4801.0
14.0
65.0
3217.0
4483.0
1668
4603034
yjjP
4602858
176
119
331
24619
29113
234
470
...
5120.0
9129.0
45.0
145.0
8368.0
12953.0
37.0
104.0
7387.0
8571.0
1669
4603477
yjjQ
4603319
158
29
143
10300
12296
75
205
...
2605.0
3059.0
19.0
35.0
3157.0
4915.0
4.0
29.0
3232.0
3323.0
1670
4605752
fhuF
4605663
89
1271
3268
36712
34316
1104
3735
...
11448.0
12770.0
387.0
1392.0
13155.0
20631.0
1013.0
1043.0
15701.0
14489.0
1671
4606273
leuV
4606165
108
502
579
1946
999
321
448
...
45748.0
20246.0
29439.0
30293.0
20196.0
8576.0
21569.0
35549.0
12062.0
16187.0
1672
4607868
rsmC
4607700
168
3543
2962
1325
1080
1297
3245
...
842.0
614.0
816.0
1722.0
608.0
849.0
1609.0
1014.0
988.0
639.0
1673
4611396
osmY
4611153
243
314
294
3848
4794
275
342
...
1584.0
1783.0
104.0
214.0
1769.0
2404.0
100.0
115.0
2929.0
2310.0
1674
4617323
deoC
4616679
644
1894
3545
1746
2914
9587
8747
...
566.0
1042.0
1599.0
1242.0
1030.0
1442.0
354.0
492.0
2522.0
2506.0
1675
4621769
yjjJ
4621657
112
786
688
1531
1887
1376
1297
...
692.0
670.0
1131.0
1953.0
538.0
671.0
1041.0
1377.0
965.0
1043.0
1676
4624238
lplA
4624117
121
401
389
1026
1127
431
533
...
307.0
357.0
130.0
150.0
370.0
472.0
142.0
209.0
682.0
733.0
1677
4633899
yjjX
4633745
154
943
984
1712
1743
784
1176
...
697.0
919.0
972.0
1057.0
873.0
1094.0
668.0
604.0
1657.0
1614.0
1678
4635521
creA
4635243
278
430
493
567
1009
568
636
...
569.0
1026.0
873.0
1255.0
877.0
1188.0
763.0
1311.0
1851.0
1975.0
1679
4640508
arcA
4640306
202
11171
11170
4073
5514
7081
13961
...
1236.0
2274.0
4149.0
5441.0
1986.0
2406.0
4439.0
4040.0
9183.0
5447.0
1680
4640512
arcA
4640306
206
11171
11170
4073
5514
7081
13961
...
1241.0
2282.0
4156.0
5452.0
1994.0
2416.0
4447.0
4051.0
9213.0
5469.0
1681
4640535
arcA
4640306
229
11171
11170
4073
5514
7081
13961
...
1269.0
2332.0
4276.0
5510.0
2024.0
2476.0
4499.0
4133.0
9558.0
5665.0
1682
4640599
arcA
4640306
293
11171
11170
4073
5514
7081
13961
...
1352.0
2549.0
4610.0
5906.0
2182.0
2653.0
4713.0
4453.0
11234.0
6725.0
1683
4640681
arcA
4640306
375
11171
11170
4073
5514
7081
13961
...
1367.0
2584.0
4689.0
6009.0
2245.0
2710.0
4779.0
4502.0
11298.0
6789.0
1684
4640688
arcA
4640306
382
11171
11170
4073
5514
7081
13961
...
1367.0
2587.0
4694.0
6018.0
2252.0
2713.0
4786.0
4505.0
11303.0
6792.0
1685
4640801
arcA
4640306
495
11171
11170
4073
5514
7081
13961
...
1384.0
2640.0
4770.0
6104.0
2287.0
2790.0
4864.0
4582.0
11386.0
6897.0
1686
4640942
yjtD
4640838
104
216
187
165
290
187
247
...
53.0
147.0
171.0
123.0
102.0
120.0
166.0
173.0
195.0
292.0
1687 rows × 28 columns
In [8]:
dfm.to_csv('../results/dfm_raw.csv', sep='\t')
In [42]:
#id_vars = ['TSS','TU_name','coord_5','coord_3','gene', 'UTR_length']
id_vars = ['gene', 'UTR_length']
value_vars = ['s9','s17','s19','s9+bcm','s17+bcm','s19+bcm']
dfn = dfm.copy()
def pseudo_counts(x):
return x + 1 if x == 0 else x
# Normalize counts by gene and utr length
def norm_orf(barcode, rec):
#return float(rec[barcode] / abs(rec['first_gene_5'] - rec['first_gene_3']))
return rec[barcode]
def norm_utr(barcode, rec):
return float(rec['utr_{0}'.format(barcode)] / rec['UTR_length'])
for barcode in barcodes:
dfn[barcode] = dfn[barcode].apply(pseudo_counts)
dfn['orf_{0}'.format(barcode)] = dfn.apply(lambda rec: norm_orf(barcode, rec), axis=1)
dfn['utr_{0}'.format(barcode)] = dfn.apply(lambda rec: norm_utr(barcode, rec), axis=1)
df = dfn[id_vars].copy()
# Take means across replicates according to the samples dict
for sample, bcs in samples.items():
df['orf_{0}'.format(sample)] = np.log10(dfn[['orf_{0}'.format(b) for b in list(bcs)]].mean(axis=1))
df['utr_{0}'.format(sample)] = np.log10(dfn[['utr_{0}'.format(b) for b in list(bcs)]].mean(axis=1))
df
Out[42]:
gene
UTR_length
orf_s17+bcm
utr_s17+bcm
orf_s9+bcm
utr_s9+bcm
orf_s19
utr_s19
orf_s17
utr_s17
orf_s9
utr_s9
orf_s19+bcm
utr_s19+bcm
0
yaaX
204
2.708846
0.336283
2.695919
0.247275
2.347330
-0.040117
2.403121
0.262079
2.430559
0.278642
2.886491
0.357356
1
yaaA
128
2.938269
0.256402
2.862131
0.180592
2.883945
0.096910
2.958803
0.281956
2.930440
0.236199
3.122380
0.432493
2
yaaA
156
2.938269
0.199572
2.862131
0.116506
2.883945
0.015048
2.958803
0.202202
2.930440
0.161944
3.122380
0.375077
3
yaaW
186
3.303952
0.639776
3.237292
0.536667
1.778151
-1.014240
1.740363
-0.889302
1.694605
-1.052029
3.678427
0.913899
4
yaaI
127
2.975891
0.491693
2.877083
0.414710
1.380211
-1.082614
1.612784
-0.836632
1.505150
-0.913472
3.217747
0.548443
5
yaaI
152
2.975891
0.493935
2.877083
0.397368
1.380211
-1.121146
1.612784
-0.903090
1.505150
-0.926571
3.217747
0.555747
6
dnaK
115
4.921624
1.762711
4.822005
1.618775
4.925830
1.564357
4.642825
1.520740
4.652493
1.332965
5.249165
2.117559
7
nhaA
172
3.623818
0.969811
3.533009
0.795880
3.015360
-0.212276
3.039811
0.005021
3.135451
-0.024675
4.024998
1.177100
8
rpsT
132
4.374583
1.885721
4.342205
1.840087
4.684401
2.194230
4.651273
2.171582
4.771216
2.320020
4.075273
1.683940
9
ileS
558
4.047664
0.705459
4.004063
0.602352
4.399579
0.973649
4.242777
0.994123
4.268601
0.927446
4.382755
1.030067
10
ileS
357
4.047664
0.823726
4.004063
0.722297
4.399579
1.111927
4.242777
1.113475
4.268601
1.045520
4.382755
1.187537
11
ileS
162
4.047664
1.092924
4.004063
0.983888
4.399579
1.395413
4.242777
1.388673
4.268601
1.318179
4.382755
1.485002
12
lspA
193
3.214446
0.871899
3.182985
0.808564
3.576341
1.153143
3.397505
1.084308
3.505828
1.125894
3.469085
1.145324
13
dapB
86
3.072434
0.211630
2.939769
0.014892
3.327461
0.186075
3.382737
0.343111
3.215770
0.226870
3.284769
0.382520
14
carA
100
3.066512
0.122216
3.012204
-0.026872
2.939769
-0.019997
2.683047
-0.207608
2.790637
-0.055517
3.277036
0.320146
15
caiF
82
2.871281
0.226065
2.666518
-0.016187
1.841985
-0.752446
1.911158
-0.783480
1.949390
-0.474481
3.349860
0.676694
16
caiE
128
2.716421
0.389720
2.601517
0.258278
1.822822
-0.851937
1.845098
-1.028029
1.894870
-1.028029
3.236411
0.832559
17
caiT
106
3.000000
-0.243550
2.804139
-0.396917
1.447158
-1.849215
1.525045
-1.724276
1.406540
-1.849215
3.257198
0.069863
18
yaaU
215
3.132900
0.431364
2.951095
0.206010
1.698970
-1.311249
1.883661
-1.101990
1.848189
-1.235528
3.497068
0.739259
19
kefF
166
2.877659
0.102111
2.708421
-0.041131
2.131939
-0.581619
2.285557
-0.384418
2.204120
-0.464233
3.149065
0.433587
20
apaG
428
3.241422
0.907318
3.180126
0.875940
3.258637
0.903280
3.209247
0.842481
3.263873
0.888712
3.421933
1.039405
21
rsmA
158
3.625878
0.943732
3.595276
0.908383
3.668712
1.031025
3.561876
0.993910
3.636137
1.091823
3.768638
1.085886
22
lptD
132
3.867379
0.995535
3.811474
0.893737
4.130028
1.122836
4.141246
1.258369
4.113308
1.230739
3.991293
1.040944
23
lptD
227
3.867379
0.788237
3.811474
0.685169
4.130028
0.916628
4.141246
1.042955
4.113308
1.019180
3.991293
0.839320
24
djlA
96
3.058995
0.250725
3.026125
0.112898
3.001517
0.246898
2.901458
0.152861
2.921946
0.200999
3.299507
0.396127
25
rluA
104
3.373096
0.911874
3.360972
0.894391
3.430398
1.014981
3.242293
0.749008
3.285557
0.894657
3.414973
0.988362
26
araC
146
3.018284
0.278910
2.873902
0.208559
2.419129
-0.242666
2.887898
0.177082
2.275311
-0.375478
3.206961
0.670386
27
thiQ
158
2.881955
0.456481
2.793790
0.409869
2.356026
0.069687
2.344392
0.041892
2.370143
0.113097
3.105169
0.654432
28
leuD
141
2.470557
-0.209700
2.419129
-0.248852
2.235528
-0.259917
2.105510
-0.459023
2.209515
-0.378367
2.764550
0.153977
29
leuL
316
2.181844
-0.149439
2.084576
-0.301030
2.288920
-0.431501
2.619093
-0.060354
2.359835
-0.275672
2.372912
0.076654
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1657
yjiY
88
4.658188
1.594028
4.514016
1.358713
4.949766
1.630532
4.725209
1.560192
4.571528
1.195239
4.837756
1.731754
1658
tsr
245
4.325238
1.213707
4.211734
1.023882
2.461649
-0.703424
2.818226
-0.328468
2.542825
-0.579606
4.627064
1.384803
1659
yjjL
115
4.298569
1.382643
4.209729
1.209398
2.605844
-0.447914
2.739968
-0.200360
2.591621
-0.671532
4.866160
1.941058
1660
yjjL
122
4.298569
1.362810
4.209729
1.187104
2.605844
-0.473576
2.739968
-0.226022
2.591621
-0.679820
4.866160
1.917122
1661
yjjL
160
4.298569
1.333447
4.209729
1.160056
2.605844
-0.522879
2.739968
-0.274701
2.591621
-0.666301
4.866160
1.842785
1662
yjjM
96
4.277552
1.225498
4.113459
1.067334
2.343409
-0.602060
2.663701
-0.079181
2.263636
-0.739233
4.450780
1.386202
1663
yjjA
148
3.948780
1.469275
3.884654
1.387907
2.962132
0.276896
2.966611
0.257062
3.013890
0.334888
3.810904
1.378619
1664
dnaT
98
4.072673
1.816478
3.983942
1.737696
2.828338
0.491648
2.810569
0.509148
2.916717
0.630988
3.986144
1.708698
1665
dnaT
98
4.072673
1.816478
3.983942
1.737696
2.828338
0.491648
2.810569
0.509148
2.916717
0.630988
3.986144
1.708698
1666
yjjB
227
3.806858
1.384495
3.748769
1.401977
1.763428
-0.759429
1.897627
-0.539785
1.694605
-0.732777
3.665206
1.229435
1667
yjjB
227
3.806858
1.384495
3.748769
1.401977
1.763428
-0.759429
1.897627
-0.539785
1.694605
-0.732777
3.665206
1.229435
1668
yjjP
176
4.563665
1.782265
4.429203
1.607242
2.382917
-0.397324
2.546543
-0.267789
2.352183
-0.379225
4.377971
1.656436
1669
yjjQ
158
4.195651
1.407294
4.053002
1.253436
1.944483
-0.981173
2.146128
-0.767293
1.934498
-0.943385
4.156110
1.316886
1670
fhuF
89
4.646315
2.278317
4.550400
2.133718
3.503109
1.062603
3.383726
0.999756
3.355930
1.023969
4.594707
2.229443
1671
leuV
108
2.957368
2.124516
3.168055
2.485051
2.804480
2.422319
2.584896
2.441753
2.732796
2.585722
3.103462
2.116549
1672
rsmC
168
3.074816
0.637120
3.080085
0.636822
3.452706
0.892459
3.356217
0.878152
3.512217
1.073544
3.207904
0.685048
1673
osmY
243
3.722181
0.933812
3.635584
0.840607
2.414973
-0.354198
2.489255
-0.184209
2.482874
-0.165498
3.893318
1.032612
1674
deoC
644
3.610820
0.283133
3.367356
0.096370
3.513617
-0.182545
3.962227
0.343555
3.434489
-0.200895
3.819544
0.591479
1675
yjjJ
112
3.315656
0.732178
3.232742
0.783929
2.959041
1.033208
3.125969
1.138866
2.867467
1.163236
3.555215
0.952516
1676
lplA
121
3.049024
0.541497
3.032014
0.438353
2.863620
0.161492
2.683047
0.063343
2.596597
-0.031633
3.368752
0.766941
1677
yjjX
154
3.261739
0.805254
3.237418
0.719891
2.896526
0.615936
2.991226
0.818731
2.983852
0.730510
3.488762
1.026130
1678
creA
278
2.965437
0.569845
2.896526
0.457686
2.802432
0.571734
2.779596
0.582897
2.664172
0.573614
3.302980
0.837670
1679
arcA
202
3.695919
1.036281
3.680653
0.938926
4.071072
1.321963
4.022057
1.375437
4.048073
1.329378
4.256261
1.558863
1680
arcA
206
3.695919
1.029541
3.680653
0.932015
4.071072
1.314420
4.022057
1.367736
4.048073
1.321968
4.256261
1.551888
1681
arcA
229
3.695919
0.992347
3.680653
0.895558
4.071072
1.275246
4.022057
1.329740
4.048073
1.281936
4.256261
1.521635
1682
arcA
293
3.695919
0.916499
3.680653
0.823278
4.071072
1.194282
4.022057
1.253953
4.048073
1.196503
4.256261
1.486385
1683
arcA
375
3.695919
0.819982
3.680653
0.721646
4.071072
1.092534
4.022057
1.154241
4.048073
1.094960
4.256261
1.382305
1684
arcA
382
3.695919
0.812826
3.680653
0.713943
4.071072
1.084969
4.022057
1.146777
4.048073
1.087207
4.256261
1.374465
1685
arcA
495
3.695919
0.709972
3.680653
0.609023
4.071072
0.979613
4.022057
1.040754
4.048073
0.980072
4.256261
1.266412
1686
yjtD
104
2.469085
0.028290
2.356981
-0.017033
2.563481
0.212136
2.336460
0.150284
2.304275
0.032185
2.826399
0.369466
1687 rows × 14 columns
In [43]:
p = ggplot(df, aes(x='utr_s9', y='utr_s9+bcm', size='UTR_length')) \
+ geom_point(alpha=0.1) \
+ geom_abline(slope=1, intercept=0, size=2.5, color='#586e75')
print(p)
<ggplot: (8783559005255)>
In [44]:
p = ggplot(df, aes(x='orf_s9', y='orf_s9+bcm')) \
+ geom_point(alpha=0.1) \
+ geom_abline(slope=1, intercept=0, size=2.5, color='#586e75')
print(p)
<ggplot: (-9223363253305905997)>
In [15]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import euclidean_distances
from sklearn.neighbors import kneighbors_graph
from sklearn import cluster
from sklearn import mixture
X = df.as_matrix(columns=['utr_s9', 'utr_s9+bcm'])
X = StandardScaler().fit_transform(X)
bandwidth = cluster.estimate_bandwidth(X, quantile=0.3)
connectivity = kneighbors_graph(X, n_neighbors=20)
connectivity = 0.05 * (connectivity + connectivity.T)
#distances = euclidean_distances(X)
gmm = mixture.GMM(n_components=2, covariance_type='full')
ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
two_means = cluster.MiniBatchKMeans(n_clusters=2, batch_size=200)
kmeans = cluster.KMeans(n_clusters=2)
ward = cluster.AgglomerativeClustering(n_clusters=2, linkage='ward', connectivity=connectivity)
spectral = cluster.SpectralClustering(n_clusters=2, n_neighbors=20, eigen_solver='arpack', affinity='nearest_neighbors')
dbscan = cluster.DBSCAN(eps=.5)
affinity_propagation = cluster.AffinityPropagation(damping=.95, preference=-200)
average_linkage = cluster.AgglomerativeClustering(linkage='average', affinity='cityblock', n_clusters=2, connectivity=connectivity)
for name, alg in [
('MiniBatchKMeans', two_means),
('KMeans', kmeans),
('AffinityPropagation', affinity_propagation),
('MeanShift', ms),
('GMM', gmm),
('SpectralClustering', spectral),
('Ward', ward),
('AgglomerativeClustering', average_linkage),
('DBSCAN', dbscan)
]:
alg.fit(X)
if hasattr(alg, 'labels_'):
df['label'] = alg.labels_.astype(np.int)
else:
df['label'] = alg.predict(X)
p = ggplot(df, aes(x='utr_s9', y='utr_s9+bcm', color='label')) \
+ geom_point(alpha=0.5) \
+ ggtitle(name) \
+ geom_abline(slope=1, intercept=0, size=2.5, color='#586e75')
print(p)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/neighbors/graph.py:36: DeprecationWarning: The behavior of 'kneighbors_graph' when mode='connectivity' will change in version 0.18. Presently, the nearest neighbor of each sample is the sample itself. Beginning in version 0.18, the default behavior will be to exclude each sample from being its own nearest neighbor. To maintain the current behavior, set include_self=True.
"behavior, set include_self=True.", DeprecationWarning)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1252: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, init_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:593: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, init_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:593: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, init_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:593: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, init_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
0, n_samples - 1, self.batch_size)
<ggplot: (-9223363253352715057)>
<ggplot: (8783583147219)>
<ggplot: (8783577909645)>
<ggplot: (-9223363253282665065)>
<ggplot: (8783569536618)>
<ggplot: (-9223363253285253979)>
<ggplot: (8783566966683)>
<ggplot: (8783566966690)>
<ggplot: (8783583156032)>
In [45]:
X = df.as_matrix(columns=['utr_s9', 'utr_s9+bcm'])
X = StandardScaler().fit_transform(X)
covar_type = ['spherical', 'diag', 'tied', 'full']
def rho_size(rec):
if rec['gene'] == 'rpoS' and rec['UTR_length'] > 500:
return 'rpoS'
else:
return ''
for covar in covar_type:
gmm = mixture.GMM(n_components=2, covariance_type=covar)
gmm.fit(X)
df['label'] = gmm.predict(X)
df['rho'] = df.apply(rho_size, axis=1)
p = ggplot(df, aes(x='utr_s9', y='utr_s9+bcm', color='label', label='rho')) \
+ geom_point(alpha=0.24, size=5) \
+ geom_text(color="black") \
+ geom_abline(slope=1, intercept=0, size=2.5, color='#586e75') \
+ scale_x_continuous(name="-BCM (log(RPKM)") \
+ scale_y_continuous(name="+BCM (log(RPKM)") \
+ theme(axis_title=element_text(size=20),
axis_text=element_text(size=20))
print(p)
<ggplot: (-9223363253305990319)>
<ggplot: (-9223363253305931674)>
<ggplot: (-9223363253310980629)>
<ggplot: (-9223363253310931638)>
In [46]:
df
Out[46]:
gene
UTR_length
orf_s17+bcm
utr_s17+bcm
orf_s9+bcm
utr_s9+bcm
orf_s19
utr_s19
orf_s17
utr_s17
orf_s9
utr_s9
orf_s19+bcm
utr_s19+bcm
label
rho
0
yaaX
204
2.708846
0.336283
2.695919
0.247275
2.347330
-0.040117
2.403121
0.262079
2.430559
0.278642
2.886491
0.357356
1
1
yaaA
128
2.938269
0.256402
2.862131
0.180592
2.883945
0.096910
2.958803
0.281956
2.930440
0.236199
3.122380
0.432493
1
2
yaaA
156
2.938269
0.199572
2.862131
0.116506
2.883945
0.015048
2.958803
0.202202
2.930440
0.161944
3.122380
0.375077
1
3
yaaW
186
3.303952
0.639776
3.237292
0.536667
1.778151
-1.014240
1.740363
-0.889302
1.694605
-1.052029
3.678427
0.913899
0
4
yaaI
127
2.975891
0.491693
2.877083
0.414710
1.380211
-1.082614
1.612784
-0.836632
1.505150
-0.913472
3.217747
0.548443
0
5
yaaI
152
2.975891
0.493935
2.877083
0.397368
1.380211
-1.121146
1.612784
-0.903090
1.505150
-0.926571
3.217747
0.555747
0
6
dnaK
115
4.921624
1.762711
4.822005
1.618775
4.925830
1.564357
4.642825
1.520740
4.652493
1.332965
5.249165
2.117559
1
7
nhaA
172
3.623818
0.969811
3.533009
0.795880
3.015360
-0.212276
3.039811
0.005021
3.135451
-0.024675
4.024998
1.177100
0
8
rpsT
132
4.374583
1.885721
4.342205
1.840087
4.684401
2.194230
4.651273
2.171582
4.771216
2.320020
4.075273
1.683940
1
9
ileS
558
4.047664
0.705459
4.004063
0.602352
4.399579
0.973649
4.242777
0.994123
4.268601
0.927446
4.382755
1.030067
1
10
ileS
357
4.047664
0.823726
4.004063
0.722297
4.399579
1.111927
4.242777
1.113475
4.268601
1.045520
4.382755
1.187537
1
11
ileS
162
4.047664
1.092924
4.004063
0.983888
4.399579
1.395413
4.242777
1.388673
4.268601
1.318179
4.382755
1.485002
1
12
lspA
193
3.214446
0.871899
3.182985
0.808564
3.576341
1.153143
3.397505
1.084308
3.505828
1.125894
3.469085
1.145324
1
13
dapB
86
3.072434
0.211630
2.939769
0.014892
3.327461
0.186075
3.382737
0.343111
3.215770
0.226870
3.284769
0.382520
1
14
carA
100
3.066512
0.122216
3.012204
-0.026872
2.939769
-0.019997
2.683047
-0.207608
2.790637
-0.055517
3.277036
0.320146
1
15
caiF
82
2.871281
0.226065
2.666518
-0.016187
1.841985
-0.752446
1.911158
-0.783480
1.949390
-0.474481
3.349860
0.676694
0
16
caiE
128
2.716421
0.389720
2.601517
0.258278
1.822822
-0.851937
1.845098
-1.028029
1.894870
-1.028029
3.236411
0.832559
0
17
caiT
106
3.000000
-0.243550
2.804139
-0.396917
1.447158
-1.849215
1.525045
-1.724276
1.406540
-1.849215
3.257198
0.069863
0
18
yaaU
215
3.132900
0.431364
2.951095
0.206010
1.698970
-1.311249
1.883661
-1.101990
1.848189
-1.235528
3.497068
0.739259
0
19
kefF
166
2.877659
0.102111
2.708421
-0.041131
2.131939
-0.581619
2.285557
-0.384418
2.204120
-0.464233
3.149065
0.433587
0
20
apaG
428
3.241422
0.907318
3.180126
0.875940
3.258637
0.903280
3.209247
0.842481
3.263873
0.888712
3.421933
1.039405
1
21
rsmA
158
3.625878
0.943732
3.595276
0.908383
3.668712
1.031025
3.561876
0.993910
3.636137
1.091823
3.768638
1.085886
1
22
lptD
132
3.867379
0.995535
3.811474
0.893737
4.130028
1.122836
4.141246
1.258369
4.113308
1.230739
3.991293
1.040944
1
23
lptD
227
3.867379
0.788237
3.811474
0.685169
4.130028
0.916628
4.141246
1.042955
4.113308
1.019180
3.991293
0.839320
1
24
djlA
96
3.058995
0.250725
3.026125
0.112898
3.001517
0.246898
2.901458
0.152861
2.921946
0.200999
3.299507
0.396127
1
25
rluA
104
3.373096
0.911874
3.360972
0.894391
3.430398
1.014981
3.242293
0.749008
3.285557
0.894657
3.414973
0.988362
1
26
araC
146
3.018284
0.278910
2.873902
0.208559
2.419129
-0.242666
2.887898
0.177082
2.275311
-0.375478
3.206961
0.670386
0
27
thiQ
158
2.881955
0.456481
2.793790
0.409869
2.356026
0.069687
2.344392
0.041892
2.370143
0.113097
3.105169
0.654432
0
28
leuD
141
2.470557
-0.209700
2.419129
-0.248852
2.235528
-0.259917
2.105510
-0.459023
2.209515
-0.378367
2.764550
0.153977
1
29
leuL
316
2.181844
-0.149439
2.084576
-0.301030
2.288920
-0.431501
2.619093
-0.060354
2.359835
-0.275672
2.372912
0.076654
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1657
yjiY
88
4.658188
1.594028
4.514016
1.358713
4.949766
1.630532
4.725209
1.560192
4.571528
1.195239
4.837756
1.731754
1
1658
tsr
245
4.325238
1.213707
4.211734
1.023882
2.461649
-0.703424
2.818226
-0.328468
2.542825
-0.579606
4.627064
1.384803
0
1659
yjjL
115
4.298569
1.382643
4.209729
1.209398
2.605844
-0.447914
2.739968
-0.200360
2.591621
-0.671532
4.866160
1.941058
0
1660
yjjL
122
4.298569
1.362810
4.209729
1.187104
2.605844
-0.473576
2.739968
-0.226022
2.591621
-0.679820
4.866160
1.917122
0
1661
yjjL
160
4.298569
1.333447
4.209729
1.160056
2.605844
-0.522879
2.739968
-0.274701
2.591621
-0.666301
4.866160
1.842785
0
1662
yjjM
96
4.277552
1.225498
4.113459
1.067334
2.343409
-0.602060
2.663701
-0.079181
2.263636
-0.739233
4.450780
1.386202
0
1663
yjjA
148
3.948780
1.469275
3.884654
1.387907
2.962132
0.276896
2.966611
0.257062
3.013890
0.334888
3.810904
1.378619
0
1664
dnaT
98
4.072673
1.816478
3.983942
1.737696
2.828338
0.491648
2.810569
0.509148
2.916717
0.630988
3.986144
1.708698
0
1665
dnaT
98
4.072673
1.816478
3.983942
1.737696
2.828338
0.491648
2.810569
0.509148
2.916717
0.630988
3.986144
1.708698
0
1666
yjjB
227
3.806858
1.384495
3.748769
1.401977
1.763428
-0.759429
1.897627
-0.539785
1.694605
-0.732777
3.665206
1.229435
0
1667
yjjB
227
3.806858
1.384495
3.748769
1.401977
1.763428
-0.759429
1.897627
-0.539785
1.694605
-0.732777
3.665206
1.229435
0
1668
yjjP
176
4.563665
1.782265
4.429203
1.607242
2.382917
-0.397324
2.546543
-0.267789
2.352183
-0.379225
4.377971
1.656436
0
1669
yjjQ
158
4.195651
1.407294
4.053002
1.253436
1.944483
-0.981173
2.146128
-0.767293
1.934498
-0.943385
4.156110
1.316886
0
1670
fhuF
89
4.646315
2.278317
4.550400
2.133718
3.503109
1.062603
3.383726
0.999756
3.355930
1.023969
4.594707
2.229443
0
1671
leuV
108
2.957368
2.124516
3.168055
2.485051
2.804480
2.422319
2.584896
2.441753
2.732796
2.585722
3.103462
2.116549
1
1672
rsmC
168
3.074816
0.637120
3.080085
0.636822
3.452706
0.892459
3.356217
0.878152
3.512217
1.073544
3.207904
0.685048
1
1673
osmY
243
3.722181
0.933812
3.635584
0.840607
2.414973
-0.354198
2.489255
-0.184209
2.482874
-0.165498
3.893318
1.032612
0
1674
deoC
644
3.610820
0.283133
3.367356
0.096370
3.513617
-0.182545
3.962227
0.343555
3.434489
-0.200895
3.819544
0.591479
0
1675
yjjJ
112
3.315656
0.732178
3.232742
0.783929
2.959041
1.033208
3.125969
1.138866
2.867467
1.163236
3.555215
0.952516
1
1676
lplA
121
3.049024
0.541497
3.032014
0.438353
2.863620
0.161492
2.683047
0.063343
2.596597
-0.031633
3.368752
0.766941
0
1677
yjjX
154
3.261739
0.805254
3.237418
0.719891
2.896526
0.615936
2.991226
0.818731
2.983852
0.730510
3.488762
1.026130
1
1678
creA
278
2.965437
0.569845
2.896526
0.457686
2.802432
0.571734
2.779596
0.582897
2.664172
0.573614
3.302980
0.837670
1
1679
arcA
202
3.695919
1.036281
3.680653
0.938926
4.071072
1.321963
4.022057
1.375437
4.048073
1.329378
4.256261
1.558863
1
1680
arcA
206
3.695919
1.029541
3.680653
0.932015
4.071072
1.314420
4.022057
1.367736
4.048073
1.321968
4.256261
1.551888
1
1681
arcA
229
3.695919
0.992347
3.680653
0.895558
4.071072
1.275246
4.022057
1.329740
4.048073
1.281936
4.256261
1.521635
1
1682
arcA
293
3.695919
0.916499
3.680653
0.823278
4.071072
1.194282
4.022057
1.253953
4.048073
1.196503
4.256261
1.486385
1
1683
arcA
375
3.695919
0.819982
3.680653
0.721646
4.071072
1.092534
4.022057
1.154241
4.048073
1.094960
4.256261
1.382305
1
1684
arcA
382
3.695919
0.812826
3.680653
0.713943
4.071072
1.084969
4.022057
1.146777
4.048073
1.087207
4.256261
1.374465
1
1685
arcA
495
3.695919
0.709972
3.680653
0.609023
4.071072
0.979613
4.022057
1.040754
4.048073
0.980072
4.256261
1.266412
1
1686
yjtD
104
2.469085
0.028290
2.356981
-0.017033
2.563481
0.212136
2.336460
0.150284
2.304275
0.032185
2.826399
0.369466
1
1687 rows × 16 columns
In [48]:
df[df['gene'] == 'alaC'][['utr_s9','utr_s9+bcm', 'label']]
Out[48]:
utr_s9
utr_s9+bcm
label
868
0.124416
0.314394
0
In [49]:
df.to_csv('../results/utr_bcm.csv', sep='\t')
In [25]:
X = df.as_matrix(columns=['utr_s9', 'utr_s9+bcm'])
#X = StandardScaler().fit_transform(X)
gmm = mixture.GMM(n_components=2, covariance_type='full')
gmm.fit(X)
df['label'] = gmm.predict(X)
df
Out[25]:
gene
UTR_length
orf_s17+bcm
utr_s17+bcm
orf_s9+bcm
utr_s9+bcm
orf_s19
utr_s19
orf_s17
utr_s17
orf_s9
utr_s9
orf_s19+bcm
utr_s19+bcm
label
0
yaaX
204
2.708846
0.336283
2.695919
0.247275
2.347330
-0.040117
2.403121
0.262079
2.430559
0.278642
2.886491
0.357356
1
1
yaaA
128
2.938269
0.256402
2.862131
0.180592
2.883945
0.096910
2.958803
0.281956
2.930440
0.236199
3.122380
0.432493
1
2
yaaA
156
2.938269
0.199572
2.862131
0.116506
2.883945
0.015048
2.958803
0.202202
2.930440
0.161944
3.122380
0.375077
1
3
yaaW
186
3.303952
0.639776
3.237292
0.536667
1.778151
-1.014240
1.740363
-0.889302
1.694605
-1.052029
3.678427
0.913899
0
4
yaaI
127
2.975891
0.491693
2.877083
0.414710
1.380211
-1.082614
1.612784
-0.836632
1.505150
-0.913472
3.217747
0.548443
0
5
yaaI
152
2.975891
0.493935
2.877083
0.397368
1.380211
-1.121146
1.612784
-0.903090
1.505150
-0.926571
3.217747
0.555747
0
6
dnaK
115
4.921624
1.762711
4.822005
1.618775
4.925830
1.564357
4.642825
1.520740
4.652493
1.332965
5.249165
2.117559
1
7
nhaA
172
3.623818
0.969811
3.533009
0.795880
3.015360
-0.212276
3.039811
0.005021
3.135451
-0.024675
4.024998
1.177100
0
8
rpsT
132
4.374583
1.885721
4.342205
1.840087
4.684401
2.194230
4.651273
2.171582
4.771216
2.320020
4.075273
1.683940
1
9
ileS
558
4.047664
0.705459
4.004063
0.602352
4.399579
0.973649
4.242777
0.994123
4.268601
0.927446
4.382755
1.030067
1
10
ileS
357
4.047664
0.823726
4.004063
0.722297
4.399579
1.111927
4.242777
1.113475
4.268601
1.045520
4.382755
1.187537
1
11
ileS
162
4.047664
1.092924
4.004063
0.983888
4.399579
1.395413
4.242777
1.388673
4.268601
1.318179
4.382755
1.485002
1
12
lspA
193
3.214446
0.871899
3.182985
0.808564
3.576341
1.153143
3.397505
1.084308
3.505828
1.125894
3.469085
1.145324
1
13
dapB
86
3.072434
0.211630
2.939769
0.014892
3.327461
0.186075
3.382737
0.343111
3.215770
0.226870
3.284769
0.382520
1
14
carA
100
3.066512
0.122216
3.012204
-0.026872
2.939769
-0.019997
2.683047
-0.207608
2.790637
-0.055517
3.277036
0.320146
1
15
caiF
82
2.871281
0.226065
2.666518
-0.016187
1.841985
-0.752446
1.911158
-0.783480
1.949390
-0.474481
3.349860
0.676694
0
16
caiE
128
2.716421
0.389720
2.601517
0.258278
1.822822
-0.851937
1.845098
-1.028029
1.894870
-1.028029
3.236411
0.832559
0
17
caiT
106
3.000000
-0.243550
2.804139
-0.396917
1.447158
-1.849215
1.525045
-1.724276
1.406540
-1.849215
3.257198
0.069863
0
18
yaaU
215
3.132900
0.431364
2.951095
0.206010
1.698970
-1.311249
1.883661
-1.101990
1.848189
-1.235528
3.497068
0.739259
0
19
kefF
166
2.877659
0.102111
2.708421
-0.041131
2.131939
-0.581619
2.285557
-0.384418
2.204120
-0.464233
3.149065
0.433587
0
20
apaG
428
3.241422
0.907318
3.180126
0.875940
3.258637
0.903280
3.209247
0.842481
3.263873
0.888712
3.421933
1.039405
1
21
rsmA
158
3.625878
0.943732
3.595276
0.908383
3.668712
1.031025
3.561876
0.993910
3.636137
1.091823
3.768638
1.085886
1
22
lptD
132
3.867379
0.995535
3.811474
0.893737
4.130028
1.122836
4.141246
1.258369
4.113308
1.230739
3.991293
1.040944
1
23
lptD
227
3.867379
0.788237
3.811474
0.685169
4.130028
0.916628
4.141246
1.042955
4.113308
1.019180
3.991293
0.839320
1
24
djlA
96
3.058995
0.250725
3.026125
0.112898
3.001517
0.246898
2.901458
0.152861
2.921946
0.200999
3.299507
0.396127
1
25
rluA
104
3.373096
0.911874
3.360972
0.894391
3.430398
1.014981
3.242293
0.749008
3.285557
0.894657
3.414973
0.988362
1
26
araC
146
3.018284
0.278910
2.873902
0.208559
2.419129
-0.242666
2.887898
0.177082
2.275311
-0.375478
3.206961
0.670386
0
27
thiQ
158
2.881955
0.456481
2.793790
0.409869
2.356026
0.069687
2.344392
0.041892
2.370143
0.113097
3.105169
0.654432
0
28
leuD
141
2.470557
-0.209700
2.419129
-0.248852
2.235528
-0.259917
2.105510
-0.459023
2.209515
-0.378367
2.764550
0.153977
1
29
leuL
316
2.181844
-0.149439
2.084576
-0.301030
2.288920
-0.431501
2.619093
-0.060354
2.359835
-0.275672
2.372912
0.076654
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1657
yjiY
88
4.658188
1.594028
4.514016
1.358713
4.949766
1.630532
4.725209
1.560192
4.571528
1.195239
4.837756
1.731754
1
1658
tsr
245
4.325238
1.213707
4.211734
1.023882
2.461649
-0.703424
2.818226
-0.328468
2.542825
-0.579606
4.627064
1.384803
0
1659
yjjL
115
4.298569
1.382643
4.209729
1.209398
2.605844
-0.447914
2.739968
-0.200360
2.591621
-0.671532
4.866160
1.941058
0
1660
yjjL
122
4.298569
1.362810
4.209729
1.187104
2.605844
-0.473576
2.739968
-0.226022
2.591621
-0.679820
4.866160
1.917122
0
1661
yjjL
160
4.298569
1.333447
4.209729
1.160056
2.605844
-0.522879
2.739968
-0.274701
2.591621
-0.666301
4.866160
1.842785
0
1662
yjjM
96
4.277552
1.225498
4.113459
1.067334
2.343409
-0.602060
2.663701
-0.079181
2.263636
-0.739233
4.450780
1.386202
0
1663
yjjA
148
3.948780
1.469275
3.884654
1.387907
2.962132
0.276896
2.966611
0.257062
3.013890
0.334888
3.810904
1.378619
0
1664
dnaT
98
4.072673
1.816478
3.983942
1.737696
2.828338
0.491648
2.810569
0.509148
2.916717
0.630988
3.986144
1.708698
0
1665
dnaT
98
4.072673
1.816478
3.983942
1.737696
2.828338
0.491648
2.810569
0.509148
2.916717
0.630988
3.986144
1.708698
0
1666
yjjB
227
3.806858
1.384495
3.748769
1.401977
1.763428
-0.759429
1.897627
-0.539785
1.694605
-0.732777
3.665206
1.229435
0
1667
yjjB
227
3.806858
1.384495
3.748769
1.401977
1.763428
-0.759429
1.897627
-0.539785
1.694605
-0.732777
3.665206
1.229435
0
1668
yjjP
176
4.563665
1.782265
4.429203
1.607242
2.382917
-0.397324
2.546543
-0.267789
2.352183
-0.379225
4.377971
1.656436
0
1669
yjjQ
158
4.195651
1.407294
4.053002
1.253436
1.944483
-0.981173
2.146128
-0.767293
1.934498
-0.943385
4.156110
1.316886
0
1670
fhuF
89
4.646315
2.278317
4.550400
2.133718
3.503109
1.062603
3.383726
0.999756
3.355930
1.023969
4.594707
2.229443
0
1671
leuV
108
2.957368
2.124516
3.168055
2.485051
2.804480
2.422319
2.584896
2.441753
2.732796
2.585722
3.103462
2.116549
1
1672
rsmC
168
3.074816
0.637120
3.080085
0.636822
3.452706
0.892459
3.356217
0.878152
3.512217
1.073544
3.207904
0.685048
1
1673
osmY
243
3.722181
0.933812
3.635584
0.840607
2.414973
-0.354198
2.489255
-0.184209
2.482874
-0.165498
3.893318
1.032612
0
1674
deoC
644
3.610820
0.283133
3.367356
0.096370
3.513617
-0.182545
3.962227
0.343555
3.434489
-0.200895
3.819544
0.591479
0
1675
yjjJ
112
3.315656
0.732178
3.232742
0.783929
2.959041
1.033208
3.125969
1.138866
2.867467
1.163236
3.555215
0.952516
1
1676
lplA
121
3.049024
0.541497
3.032014
0.438353
2.863620
0.161492
2.683047
0.063343
2.596597
-0.031633
3.368752
0.766941
0
1677
yjjX
154
3.261739
0.805254
3.237418
0.719891
2.896526
0.615936
2.991226
0.818731
2.983852
0.730510
3.488762
1.026130
1
1678
creA
278
2.965437
0.569845
2.896526
0.457686
2.802432
0.571734
2.779596
0.582897
2.664172
0.573614
3.302980
0.837670
1
1679
arcA
202
3.695919
1.036281
3.680653
0.938926
4.071072
1.321963
4.022057
1.375437
4.048073
1.329378
4.256261
1.558863
1
1680
arcA
206
3.695919
1.029541
3.680653
0.932015
4.071072
1.314420
4.022057
1.367736
4.048073
1.321968
4.256261
1.551888
1
1681
arcA
229
3.695919
0.992347
3.680653
0.895558
4.071072
1.275246
4.022057
1.329740
4.048073
1.281936
4.256261
1.521635
1
1682
arcA
293
3.695919
0.916499
3.680653
0.823278
4.071072
1.194282
4.022057
1.253953
4.048073
1.196503
4.256261
1.486385
1
1683
arcA
375
3.695919
0.819982
3.680653
0.721646
4.071072
1.092534
4.022057
1.154241
4.048073
1.094960
4.256261
1.382305
1
1684
arcA
382
3.695919
0.812826
3.680653
0.713943
4.071072
1.084969
4.022057
1.146777
4.048073
1.087207
4.256261
1.374465
1
1685
arcA
495
3.695919
0.709972
3.680653
0.609023
4.071072
0.979613
4.022057
1.040754
4.048073
0.980072
4.256261
1.266412
1
1686
yjtD
104
2.469085
0.028290
2.356981
-0.017033
2.563481
0.212136
2.336460
0.150284
2.304275
0.032185
2.826399
0.369466
1
1687 rows × 15 columns
In [59]:
df[df['gene'] == 'rpoS'][['utr_s9', 'utr_s9+bcm', 'label']]
Out[59]:
utr_s9
utr_s9+bcm
label
985
0.454030
0.360994
1
986
0.397940
0.306022
1
987
0.353966
0.172502
1
In [27]:
df.to_csv('../results/utr_bcm.csv', sep='\t')
In [57]:
df[(df['label'] == 1) & (df['utr_s9'] < df['utr_s9+bcm'])]
Out[57]:
gene
UTR_length
orf_s17+bcm
utr_s17+bcm
orf_s9+bcm
utr_s9+bcm
orf_s19
utr_s19
orf_s17
utr_s17
orf_s9
utr_s9
orf_s19+bcm
utr_s19+bcm
label
rho
6
dnaK
115
4.921624
1.762711
4.822005
1.618775
4.925830
1.564357
4.642825
1.520740
4.652493
1.332965
5.249165
2.117559
1
14
carA
100
3.066512
0.122216
3.012204
-0.026872
2.939769
-0.019997
2.683047
-0.207608
2.790637
-0.055517
3.277036
0.320146
1
28
leuD
141
2.470557
-0.209700
2.419129
-0.248852
2.235528
-0.259917
2.105510
-0.459023
2.209515
-0.378367
2.764550
0.153977
1
82
yafK
188
3.426674
0.647529
3.325310
0.552241
3.316704
0.449298
3.274620
0.471307
3.284769
0.519283
3.613419
0.794584
1
86
proB
172
3.563422
0.723513
3.508866
0.640978
3.440594
0.295950
3.470190
0.485044
3.476687
0.551932
3.721316
0.859292
1
87
proB
85
3.563422
0.983865
3.508866
0.905955
3.440594
0.601421
3.470190
0.790327
3.476687
0.855198
3.721316
1.121927
1
118
yahM
152
3.052694
1.153514
2.923503
1.028876
1.897627
0.596669
2.341435
0.930929
2.062582
0.812913
3.133539
1.092430
1
168
ybaB
367
3.483516
0.846280
3.408325
0.893291
3.492551
0.869184
3.346451
0.771093
3.329296
0.847458
3.512951
0.858744
1
169
htpG
303
4.694824
1.356020
4.623596
1.302032
4.552984
1.242751
4.307785
1.209196
4.315404
1.206265
5.011251
1.478862
1
173
ybaP
182
3.088136
0.462151
3.010724
0.280883
2.678973
0.143049
2.791691
0.514810
2.629410
0.175295
3.493458
0.703716
1
174
ybaP
184
3.088136
0.468781
3.010724
0.282341
2.678973
0.151823
2.791691
0.518013
2.629410
0.181564
3.493458
0.705529
1
175
ybaP
204
3.088136
0.478538
3.010724
0.278642
2.678973
0.160927
2.791691
0.532667
2.629410
0.183130
3.493458
0.707612
1
177
ybbN
211
3.731186
0.646993
3.636789
0.466355
3.697796
0.405287
3.543571
0.432734
3.504063
0.361011
4.135959
0.927356
1
178
ybbN
289
3.731186
0.587155
3.636789
0.444358
3.697796
0.356337
3.543571
0.386366
3.504063
0.340849
4.135959
0.860804
1
179
ybbO
180
3.161218
0.267823
3.118099
0.233278
3.016824
0.079181
3.007534
0.277482
3.004751
0.231866
3.342620
0.537819
1
249
kdpE
161
2.528274
0.044812
2.457125
-0.029289
2.243038
-0.215600
2.281033
-0.071693
2.310693
-0.159551
2.782831
0.380885
1
251
rhsC
204
2.878234
-0.496717
2.743902
-0.593627
2.033424
-0.977192
2.167317
-0.791116
2.019116
-0.811320
3.161967
-0.357807
1
254
ybgI
208
2.854610
0.148063
2.760045
-0.062791
2.981366
-0.035895
3.204527
0.422694
2.865104
-0.081274
3.225180
0.417935
1
259
gltA
202
3.747412
0.463287
3.667873
0.461433
4.225710
1.000645
3.981161
0.866083
3.638389
0.400085
4.200632
0.987901
1
260
gltA
299
3.747412
0.329830
3.667873
0.312851
4.225710
0.832359
3.981161
0.730615
3.638389
0.251056
4.200632
0.836929
1
263
sucA
120
4.199467
0.973513
4.177883
0.907590
4.408816
1.154323
4.365638
1.197510
4.140272
0.906021
4.303962
1.068495
1
264
sucA
120
4.199467
0.973513
4.177883
0.907590
4.408816
1.154323
4.365638
1.197510
4.140272
0.906021
4.303962
1.068495
1
314
ybjS
145
3.223496
0.257761
3.126294
0.058740
3.016407
-0.197580
2.993657
0.021902
3.046105
-0.100670
3.449941
0.408006
1
322
clpA
184
3.941710
0.393671
3.938545
0.382076
3.885220
0.379621
3.871076
0.562874
3.765892
0.311524
4.048364
0.622237
1
323
clpA
172
3.941710
0.408910
3.938545
0.402961
3.885220
0.399452
3.871076
0.585330
3.765892
0.331498
4.048364
0.644141
1
350
ycaI
82
3.037227
0.437402
2.916717
0.265163
2.305351
0.072958
2.376577
0.133461
2.331427
0.247554
3.232742
0.367220
1
359
pqiA
342
3.367076
0.345070
3.327052
0.299758
3.070776
0.268063
2.994537
0.243038
3.064458
0.275197
3.328889
0.329297
1
360
pqiA
335
3.367076
0.351750
3.327052
0.305864
3.070776
0.269792
2.994537
0.247644
3.064458
0.277044
3.328889
0.332288
1
369
hyaA
155
2.008600
-0.588272
1.860338
-0.959883
1.041393
-1.260913
1.290035
-0.986212
1.146128
-1.315270
2.429752
-0.148939
1
398
ymdA
189
2.208173
-0.183040
2.115611
-0.251156
1.662758
-0.633009
1.763428
-0.509306
1.851258
-0.395648
2.429752
0.009096
1
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
...
1455
aspT
92
2.080987
2.193412
1.908485
2.152438
2.102091
2.218912
2.095169
2.348939
1.774517
2.135737
1.648360
1.588333
1
1456
trpT
177
2.982723
1.986671
3.301464
1.976366
3.045519
2.019210
3.016824
2.134449
3.193542
1.953260
2.525045
1.417092
1
1469
cyaA
363
4.078294
0.622651
4.041866
0.518550
3.921400
0.553201
3.809795
0.584668
3.842297
0.494515
4.380528
0.915183
1
1470
cyaA
154
4.078294
0.904146
4.041866
0.812696
3.921400
0.876188
3.809795
0.861697
3.842297
0.807676
4.380528
1.227202
1
1471
corA
232
3.975822
1.312028
3.927345
1.220367
3.377033
0.853179
3.459242
0.939648
3.563837
1.058004
4.091614
1.390730
1
1480
rfaH
144
3.217484
0.685182
3.167908
0.570802
3.066699
0.483607
2.968483
0.548782
3.113442
0.542341
3.309949
0.881844
1
1484
mobA
122
2.798996
0.373033
2.723866
0.291129
2.100371
0.124494
2.183270
0.338522
1.937016
0.255075
2.701568
0.354549
1
1504
yiiG
121
3.087781
0.091856
2.923503
-0.171628
1.748188
-0.503002
1.944483
-0.564271
1.785330
-0.237687
3.437195
0.266492
1
1517
yijO
112
2.997168
0.487340
2.783546
0.392477
2.115611
-0.005856
2.403121
0.281196
2.089905
0.267800
3.219715
0.645826
1
1521
btuB
240
3.530136
0.871184
3.412293
0.802916
3.465160
0.797325
3.428783
0.921036
3.346255
0.783099
3.876737
1.218251
1
1527
secE
181
4.181400
1.854256
4.215307
1.893231
4.092966
1.663748
3.955760
1.633887
4.079272
1.761479
3.927422
1.498767
1
1530
rpoB
182
5.252414
1.969546
5.252573
1.966902
5.080502
1.830081
5.092110
1.891513
5.154594
1.936671
5.119121
1.775158
1
1533
rsd
146
3.142546
0.382806
3.013048
0.189756
2.868056
0.143143
3.267875
0.410835
2.828338
0.094524
3.347037
0.600570
1
1577
proP
182
3.804889
0.884347
4.057514
1.247784
3.946157
1.201727
3.376212
0.682681
3.766859
1.074583
4.296347
1.469012
1
1578
proP
95
3.804889
1.160105
4.057514
1.527766
3.946157
1.482874
3.376212
0.963042
3.766859
1.356026
4.296347
1.749126
1
1588
groL
242
5.011257
1.871252
4.935107
1.777672
5.027376
1.877876
4.854731
1.783576
4.851475
1.660842
5.294230
2.171352
1
1589
groL
152
5.011257
1.932116
4.935107
1.842724
5.027376
1.966281
4.854731
1.848999
4.851475
1.733187
5.294230
2.265221
1
1604
nsrR
146
3.286793
1.099874
3.154728
1.024154
2.598791
0.746005
2.644439
0.857663
2.516535
0.882727
3.530136
1.416971
1
1610
ytfB
102
3.101231
0.465616
3.049412
0.398791
2.948168
0.349335
2.934751
0.362468
2.885926
0.301030
3.294687
0.649888
1
1611
cycA
84
3.626340
0.898542
3.591343
0.823909
3.818787
1.008702
3.357935
0.815688
3.207769
0.617923
3.911104
1.210694
1
1615
msrA
85
3.024280
0.659972
2.915927
0.502748
2.652730
0.227433
2.902818
0.696921
2.677607
0.352750
3.165541
0.674807
1
1619
yjgA
181
3.427405
0.825824
3.362482
0.573871
3.516866
0.422657
3.470337
0.480309
3.518646
0.503873
3.440752
0.642689
1
1620
yjgA
200
3.427405
0.803969
3.362482
0.554186
3.516866
0.404834
3.470337
0.454464
3.518646
0.478566
3.440752
0.627878
1
1621
yjgA
216
3.427405
0.787762
3.362482
0.538285
3.516866
0.388180
3.470337
0.428600
3.518646
0.453361
3.440752
0.612244
1
1627
yjgN
164
2.893762
0.159904
2.650308
-0.076541
1.267172
-0.752446
1.484300
-0.277828
1.332438
-0.225839
3.123852
0.340251
1
1628
yjgM
134
3.071698
0.232731
2.975891
0.040213
2.619615
-0.415298
2.690196
-0.049737
2.821514
-0.044319
3.258158
0.417583
1
1638
fecE
182
3.151523
0.503731
3.066885
0.394105
3.043362
0.304003
2.852175
0.292597
2.936011
0.313380
3.314183
0.574985
1
1639
fecE
187
3.151523
0.501579
3.066885
0.396078
3.043362
0.301610
2.852175
0.299285
2.936011
0.318666
3.314183
0.569831
1
1655
yjiA
130
3.834134
1.327752
3.670014
1.223915
3.725952
1.190332
3.619146
1.239877
3.392785
0.954614
4.012753
1.519070
1
1657
yjiY
88
4.658188
1.594028
4.514016
1.358713
4.949766
1.630532
4.725209
1.560192
4.571528
1.195239
4.837756
1.731754
1
194 rows × 16 columns
In [63]:
# Sample titles with corresponding barcodes
samples = {
's9': ['ATCACG', 'ACAGTG'],
's9+bcm': ['CGATGT', 'GCCAAT'],
's17': ['TTAGGC', 'GATCAG'],
's17+bcm': ['TGACCA', 'TAGCTT'],
's19': ['CAGATC','GGCTAC'],
's19+bcm': ['ACTTGA', 'CTTGTA']
}
dfm[dfm['gene'] == 'rpoS'][['gene', 'UTR_length', 'utr_ATCACG', 'utr_ACAGTG', 'utr_CGATGT', 'utr_GCCAAT']]
Out[63]:
gene
UTR_length
utr_ATCACG
utr_ACAGTG
utr_CGATGT
utr_GCCAAT
985
rpoS
103
207.0
379.0
170.0
303.0
986
rpoS
173
376.0
489.0
221.0
479.0
987
rpoS
567
971.0
1591.0
708.0
979.0
In [64]:
rpoS = dfm[dfm['gene'] == 'rpoS'][['gene', 'UTR_length', 'utr_ATCACG', 'utr_ACAGTG', 'utr_CGATGT', 'utr_GCCAAT']].copy()
rpoS.rename(columns={
'utr_ATCACG': 'utr_s9R1',
'utr_ACAGTG': 'utr_s9R2',
'utr_CGATGT': 'utr_s9+bcmR1',
'utr_GCCAAT': 'utr_s9+bcmR2'
}, inplace=True)
rpoS
Out[64]:
gene
UTR_length
utr_s9R1
utr_s9R2
utr_s9+bcmR1
utr_s9+bcmR2
985
rpoS
103
207.0
379.0
170.0
303.0
986
rpoS
173
376.0
489.0
221.0
479.0
987
rpoS
567
971.0
1591.0
708.0
979.0
In [70]:
drep = dfm[['gene', 'UTR_length']]
res = []
for k,v in samples.items():
d = drep.copy()
for i,barcode in enumerate(v):
d['R{}'.format(i+1)] = np.log10(dfm['utr_{}'.format(barcode)])
d['sample'] = k
res.append(d)
long = pd.concat(res)
long
Out[70]:
gene
UTR_length
R1
R2
sample
0
yaaX
204
2.642465
2.649335
s17+bcm
1
yaaA
128
2.257679
2.448706
s17+bcm
2
yaaA
156
2.281033
2.481443
s17+bcm
3
yaaW
186
2.753583
3.023664
s17+bcm
4
yaaI
127
2.589950
2.600973
s17+bcm
5
yaaI
152
2.641474
2.707570
s17+bcm
6
dnaK
115
3.802774
3.843108
s17+bcm
7
nhaA
172
3.114277
3.280578
s17+bcm
8
rpsT
132
4.037904
3.972203
s17+bcm
9
ileS
558
3.399501
3.498999
s17+bcm
10
ileS
357
3.310268
3.433770
s17+bcm
11
ileS
162
3.234264
3.361350
s17+bcm
12
lspA
193
3.072617
3.228400
s17+bcm
13
dapB
86
2.152288
2.139879
s17+bcm
14
carA
100
2.093422
2.149219
s17+bcm
15
caiF
82
2.008600
2.240549
s17+bcm
16
caiE
128
2.305351
2.629410
s17+bcm
17
caiT
106
1.518514
1.944483
s17+bcm
18
yaaU
215
2.474216
2.936011
s17+bcm
19
kefF
166
2.008600
2.502427
s17+bcm
20
apaG
428
3.481729
3.589167
s17+bcm
21
rsmA
158
2.992111
3.253822
s17+bcm
22
lptD
132
3.014940
3.198107
s17+bcm
23
lptD
227
3.054996
3.218273
s17+bcm
24
djlA
96
2.201397
2.262451
s17+bcm
25
rluA
104
2.945469
2.911690
s17+bcm
26
araC
146
2.305351
2.547775
s17+bcm
27
thiQ
158
2.552668
2.737987
s17+bcm
28
leuD
141
1.690196
2.096910
s17+bcm
29
leuL
316
2.255273
2.428135
s17+bcm
...
...
...
...
...
...
1657
yjiY
88
3.460296
3.819807
s19+bcm
1658
tsr
245
3.765669
3.782114
s19+bcm
1659
yjjL
115
3.995591
4.007833
s19+bcm
1660
yjjL
122
3.997430
4.009451
s19+bcm
1661
yjjL
160
4.044618
4.049179
s19+bcm
1662
yjjM
96
3.319314
3.412629
s19+bcm
1663
yjjA
148
3.470851
3.615003
s19+bcm
1664
dnaT
98
3.688509
3.711048
s19+bcm
1665
dnaT
98
3.688509
3.711048
s19+bcm
1666
yjjB
227
3.507451
3.651569
s19+bcm
1667
yjjB
227
3.507451
3.651569
s19+bcm
1668
yjjP
176
3.868468
3.933031
s19+bcm
1669
yjjQ
158
3.509471
3.521530
s19+bcm
1670
fhuF
89
4.195927
4.161038
s19+bcm
1671
leuV
108
4.081419
4.209166
s19+bcm
1672
rsmC
168
2.994757
2.805501
s19+bcm
1673
osmY
243
3.466719
3.363612
s19+bcm
1674
deoC
644
3.401745
3.398981
s19+bcm
1675
yjjJ
112
2.984527
3.018284
s19+bcm
1676
lplA
121
2.833784
2.865104
s19+bcm
1677
yjjX
154
3.219323
3.207904
s19+bcm
1678
creA
278
3.267406
3.295567
s19+bcm
1679
arcA
202
3.962985
3.736157
s19+bcm
1680
arcA
206
3.964401
3.737908
s19+bcm
1681
arcA
229
3.980367
3.753200
s19+bcm
1682
arcA
293
4.050534
3.827692
s19+bcm
1683
arcA
375
4.053002
3.831806
s19+bcm
1684
arcA
382
4.053194
3.831998
s19+bcm
1685
arcA
495
4.056371
3.838660
s19+bcm
1686
yjtD
104
2.290035
2.465383
s19+bcm
10122 rows × 5 columns
In [73]:
p = ggplot(long, aes(x='R1', y='R2')) \
+ geom_point(size=2.5, alpha=0.1) \
+ geom_abline(slope=1, intercept=0, size=.5, alpha=0.4, color='#586e75') \
+ facet_wrap('sample')
print(p)
<ggplot: (8783501883256)>
In [ ]:
Content source: eco32i/rpoS
Similar notebooks: