In [1]:
%load_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
#from __future__ import division
import pandas as pd
import numpy as np
from ggplot import *

import os
import sys

utils_path = os.path.abspath(os.path.join('..'))
if utils_path not in sys.path:
    sys.path.append(utils_path)
    
from utils.rpos import *

In [3]:
# Sample titles with corresponding barcodes
# s9: WT
# s9+bcm: WT +BCM
# s17: triple sRNA mutant

samples = {
    's9': ['ATCACG', 'ACAGTG'],
    's9+bcm': ['CGATGT', 'GCCAAT'],
    's17': ['TTAGGC', 'GATCAG'],
    }

# Barcodes
barcodes = ['ATCACG', 'ACAGTG', 'CGATGT', 'GCCAAT', 'TTAGGC', 'GATCAG']

In [4]:
def get_counts_df(barcodes, res_dir='../results'):
    '''
    Aggregates htseq-count results into raw counts dataframe.
    Iterates over files whose name ends with '.counts' in `res_dir` and contains barcodes
    specified in `barcodes`.
    
    Builds a dataframe containing gene names and counts for the gene for every barcode.
    '''
    df = pd.DataFrame()
    d, _, filenames = next(os.walk(res_dir))
    infiles = [f for f in filenames if f.endswith('.counts')]
    for barcode in barcodes:
        cntfile = os.path.join(d, [f for f in infiles if barcode in f][0])
        df_ = pd.read_csv(cntfile, sep='\t', header=None, names=['gene','counts'])
        if df.empty:
            df['gene'] = df_['gene']
        df[barcode] = df_['counts']
    return df[~df['gene'].str.startswith('__')]

def get_utr_counts_df(df, barcodes, res_dir='../results'):
    '''
    Calculates 5'UTR coverage
    Iterates over files whose name ends with '_sorted.bam' in `res_dir` and contains barcodes
    specified in `barcodes`.
    
    Adds `utr_<barcode>` column to df DataFrame
    '''
    d, _, filenames = next(os.walk(res_dir))
    infiles = [f for f in filenames if f.endswith('_sorted.bam')]
    for barcode in barcodes:
        bamfile = os.path.join(d, [f for f in infiles if barcode in f][0])
        #df['utr_{0}'.format(barcode)] = get_coverage(df, bamfile, 'gi|556503834|ref|NC_000913.3|',
        #                                             key5='coord_5', key3='coord_3')
        df['utr_{0}'.format(barcode)] = get_counts(df, bamfile, 'gi|556503834|ref|NC_000913.3|',
                                                     key5='start', key3='end')

In [6]:
# htseq counts for genes (orfs)
count_df = get_counts_df(barcodes, res_dir='../results')

# 5' UTR annotations
res = []
with open('../ref/utrs_corr.bed', 'r') as fi:
    for line in fi:
        fields = line.strip().split()
        res.append({
                'gene': fields[3],
                'start': int(fields[1]),
                'end': int(fields[2]),
            })
 
df_utr5 = pd.DataFrame.from_records(res)
df_utr5['UTR_length'] = abs(df_utr5['end'] - df_utr5['start'])
df_utr5 = df_utr5[df_utr5['UTR_length'] > 80]

# Merge the two on gene column
dfm = df_utr5.copy()
dfm = dfm.merge(count_df, on='gene')

# Count UTR hits and add them to the dataframe
get_utr_counts_df(dfm, barcodes, res_dir='../results')

In [7]:
dfm


Out[7]:
end gene start UTR_length ATCACG ACAGTG CGATGT GCCAAT TTAGGC GATCAG ... utr_CGATGT utr_GCCAAT utr_TTAGGC utr_GATCAG utr_TGACCA utr_TAGCTT utr_CAGATC utr_GGCTAC utr_ACTTGA utr_CTTGTA
0 5234 yaaX 5030 204 258 281 512 481 178 328 ... 351.0 370.0 298.0 448.0 439.0 446.0 137.0 235.0 479.0 450.0
1 6587 yaaA 6459 128 885 819 688 768 677 1142 ... 163.0 225.0 175.0 315.0 181.0 281.0 206.0 114.0 388.0 305.0
2 6615 yaaA 6459 156 885 819 688 768 677 1142 ... 172.0 236.0 175.0 322.0 191.0 303.0 207.0 116.0 411.0 329.0
3 11542 yaaW 11356 186 36 63 1512 1942 36 74 ... 504.0 776.0 12.0 36.0 567.0 1056.0 12.0 24.0 1505.0 1546.0
4 11913 yaaI 11786 127 24 40 581 926 31 51 ... 304.0 356.0 14.0 23.0 389.0 399.0 8.0 13.0 458.0 440.0
5 11938 yaaI 11786 152 24 40 581 926 31 51 ... 340.0 419.0 14.0 24.0 438.0 510.0 8.0 15.0 565.0 528.0
6 12163 dnaK 12048 115 36348 53503 62444 70306 39515 48358 ... 2977.0 6584.0 4799.0 2830.0 6350.0 6968.0 2336.0 6099.0 15976.0 14174.0
7 17489 nhaA 17317 172 1646 1086 2994 3830 826 1366 ... 792.0 1358.0 133.0 215.0 1301.0 1908.0 74.0 137.0 2949.0 2223.0
8 21210 rpsT 21078 132 67237 50862 25383 18595 27532 62067 ... 9302.0 8966.0 14726.0 24465.0 10912.0 9380.0 23735.0 17554.0 7045.0 5706.0
9 22391 ileS 21833 558 20377 16745 9326 10862 15253 19726 ... 1739.0 2728.0 5454.0 5556.0 2509.0 3155.0 4785.0 5718.0 6382.0 5578.0
10 22391 ileS 22034 357 20377 16745 9326 10862 15253 19726 ... 1369.0 2398.0 4524.0 4748.0 2043.0 2715.0 4188.0 5051.0 5825.0 5171.0
11 22391 ileS 22229 162 20377 16745 9326 10862 15253 19726 ... 1043.0 2079.0 3899.0 4030.0 1715.0 2298.0 3665.0 4388.0 5149.0 4749.0
12 25207 lspA 25014 193 2524 3886 1573 1475 1645 3350 ... 1189.0 1295.0 1949.0 2738.0 1182.0 1692.0 2208.0 3284.0 2990.0 2404.0
13 28374 dapB 28288 86 1753 1534 570 1171 2320 2508 ... 75.0 103.0 122.0 257.0 142.0 138.0 133.0 131.0 194.0 221.0
14 29651 carA 29551 100 706 529 938 1119 348 616 ... 72.0 116.0 61.0 63.0 124.0 141.0 130.0 61.0 206.0 212.0
15 34300 caiF 34218 82 67 111 279 649 68 95 ... 39.0 119.0 10.0 17.0 102.0 174.0 15.0 14.0 441.0 338.0
16 35499 caiE 35371 128 60 97 288 511 33 107 ... 204.0 260.0 5.0 19.0 202.0 426.0 17.0 19.0 908.0 833.0
17 42037 caiT 41931 106 17 34 452 822 21 46 ... 22.0 63.0 1.0 3.0 33.0 88.0 1.0 2.0 105.0 144.0
18 45807 yaaU 45592 215 40 101 621 1166 23 130 ... 320.0 371.0 5.0 29.0 298.0 863.0 2.0 19.0 1138.0 1221.0
19 47246 kefF 47080 166 142 178 381 641 168 218 ... 114.0 188.0 36.0 101.0 102.0 318.0 40.0 47.0 481.0 420.0
20 52034 apaG 51606 428 1989 1683 1450 1578 983 2255 ... 3292.0 3141.0 2270.0 3686.0 3032.0 3883.0 3699.0 3152.0 4924.0 4449.0
21 52588 rsmA 52430 158 4636 4017 3939 3937 2355 4938 ... 1129.0 1430.0 905.0 2211.0 982.0 1794.0 1888.0 1506.0 2132.0 1719.0
22 57241 lptD 57109 132 14342 11620 5975 6982 10598 17089 ... 996.0 1071.0 2016.0 2770.0 1035.0 1578.0 2130.0 1373.0 1517.0 1384.0
23 57336 lptD 57109 227 14342 11620 5975 6982 10598 17089 ... 1047.0 1152.0 2130.0 2882.0 1135.0 1653.0 2279.0 1468.0 1641.0 1495.0
24 57364 djlA 57268 96 921 750 1022 1102 664 930 ... 103.0 146.0 134.0 139.0 159.0 183.0 205.0 134.0 231.0 247.0
25 60450 rluA 60346 104 2420 1440 2574 2018 1580 1914 ... 884.0 747.0 544.0 623.0 882.0 816.0 1444.0 709.0 1012.0 1013.0
26 70387 araC 70241 146 208 169 659 837 838 707 ... 177.0 295.0 252.0 187.0 202.0 353.0 53.0 114.0 614.0 753.0
27 73085 thiQ 72927 158 227 242 567 677 138 304 ... 349.0 463.0 114.0 234.0 357.0 547.0 173.0 198.0 698.0 728.0
28 79594 leuD 79453 141 120 204 217 308 72 183 ... 71.0 88.0 31.0 67.0 49.0 125.0 62.0 93.0 221.0 181.0
29 84024 leuL 83708 316 128 330 93 150 332 500 ... 94.0 222.0 256.0 294.0 180.0 268.0 79.0 155.0 420.0 334.0
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1657 4591367 yjiY 4591279 88 3923 70646 17206 48114 34677 71551 ... 966.0 3054.0 2274.0 4119.0 1932.0 4979.0 175.0 7342.0 2886.0 6604.0
1658 4591657 tsr 4591412 245 142 556 16167 16399 215 1101 ... 1993.0 3184.0 46.0 184.0 2796.0 5219.0 25.0 72.0 5830.0 6055.0
1659 4594837 yjjL 4594722 115 178 603 15365 17051 238 861 ... 1382.0 2343.0 32.0 113.0 2653.0 2898.0 12.0 70.0 9899.0 10182.0
1660 4594844 yjjL 4594722 122 178 603 15365 17051 238 861 ... 1391.0 2363.0 32.0 113.0 2684.0 2942.0 12.0 70.0 9941.0 10220.0
1661 4594882 yjjL 4594722 160 178 603 15365 17051 238 861 ... 1680.0 2946.0 44.0 126.0 3153.0 3743.0 15.0 81.0 11082.0 11199.0
1662 4595947 yjjM 4595851 96 70 297 10638 15333 407 515 ... 794.0 1448.0 104.0 56.0 1681.0 1546.0 12.0 36.0 2086.0 2586.0
1663 4600337 yjjA 4600189 148 935 1130 8250 7085 673 1179 ... 3669.0 3562.0 236.0 299.0 4702.0 4019.0 285.0 275.0 2957.0 4121.0
1664 4601615 dnaT 4601517 98 804 847 9333 9941 364 929 ... 5136.0 5578.0 190.0 443.0 4980.0 7865.0 291.0 317.0 4881.0 5141.0
1665 4601615 dnaT 4601517 98 804 847 9333 9941 364 929 ... 5136.0 5578.0 190.0 443.0 4980.0 7865.0 291.0 317.0 4881.0 5141.0
1666 4602177 yjjB 4601950 227 32 67 5565 5650 74 84 ... 6396.0 5060.0 55.0 76.0 6203.0 4801.0 14.0 65.0 3217.0 4483.0
1667 4602177 yjjB 4601950 227 32 67 5565 5650 74 84 ... 6396.0 5060.0 55.0 76.0 6203.0 4801.0 14.0 65.0 3217.0 4483.0
1668 4603034 yjjP 4602858 176 119 331 24619 29113 234 470 ... 5120.0 9129.0 45.0 145.0 8368.0 12953.0 37.0 104.0 7387.0 8571.0
1669 4603477 yjjQ 4603319 158 29 143 10300 12296 75 205 ... 2605.0 3059.0 19.0 35.0 3157.0 4915.0 4.0 29.0 3232.0 3323.0
1670 4605752 fhuF 4605663 89 1271 3268 36712 34316 1104 3735 ... 11448.0 12770.0 387.0 1392.0 13155.0 20631.0 1013.0 1043.0 15701.0 14489.0
1671 4606273 leuV 4606165 108 502 579 1946 999 321 448 ... 45748.0 20246.0 29439.0 30293.0 20196.0 8576.0 21569.0 35549.0 12062.0 16187.0
1672 4607868 rsmC 4607700 168 3543 2962 1325 1080 1297 3245 ... 842.0 614.0 816.0 1722.0 608.0 849.0 1609.0 1014.0 988.0 639.0
1673 4611396 osmY 4611153 243 314 294 3848 4794 275 342 ... 1584.0 1783.0 104.0 214.0 1769.0 2404.0 100.0 115.0 2929.0 2310.0
1674 4617323 deoC 4616679 644 1894 3545 1746 2914 9587 8747 ... 566.0 1042.0 1599.0 1242.0 1030.0 1442.0 354.0 492.0 2522.0 2506.0
1675 4621769 yjjJ 4621657 112 786 688 1531 1887 1376 1297 ... 692.0 670.0 1131.0 1953.0 538.0 671.0 1041.0 1377.0 965.0 1043.0
1676 4624238 lplA 4624117 121 401 389 1026 1127 431 533 ... 307.0 357.0 130.0 150.0 370.0 472.0 142.0 209.0 682.0 733.0
1677 4633899 yjjX 4633745 154 943 984 1712 1743 784 1176 ... 697.0 919.0 972.0 1057.0 873.0 1094.0 668.0 604.0 1657.0 1614.0
1678 4635521 creA 4635243 278 430 493 567 1009 568 636 ... 569.0 1026.0 873.0 1255.0 877.0 1188.0 763.0 1311.0 1851.0 1975.0
1679 4640508 arcA 4640306 202 11171 11170 4073 5514 7081 13961 ... 1236.0 2274.0 4149.0 5441.0 1986.0 2406.0 4439.0 4040.0 9183.0 5447.0
1680 4640512 arcA 4640306 206 11171 11170 4073 5514 7081 13961 ... 1241.0 2282.0 4156.0 5452.0 1994.0 2416.0 4447.0 4051.0 9213.0 5469.0
1681 4640535 arcA 4640306 229 11171 11170 4073 5514 7081 13961 ... 1269.0 2332.0 4276.0 5510.0 2024.0 2476.0 4499.0 4133.0 9558.0 5665.0
1682 4640599 arcA 4640306 293 11171 11170 4073 5514 7081 13961 ... 1352.0 2549.0 4610.0 5906.0 2182.0 2653.0 4713.0 4453.0 11234.0 6725.0
1683 4640681 arcA 4640306 375 11171 11170 4073 5514 7081 13961 ... 1367.0 2584.0 4689.0 6009.0 2245.0 2710.0 4779.0 4502.0 11298.0 6789.0
1684 4640688 arcA 4640306 382 11171 11170 4073 5514 7081 13961 ... 1367.0 2587.0 4694.0 6018.0 2252.0 2713.0 4786.0 4505.0 11303.0 6792.0
1685 4640801 arcA 4640306 495 11171 11170 4073 5514 7081 13961 ... 1384.0 2640.0 4770.0 6104.0 2287.0 2790.0 4864.0 4582.0 11386.0 6897.0
1686 4640942 yjtD 4640838 104 216 187 165 290 187 247 ... 53.0 147.0 171.0 123.0 102.0 120.0 166.0 173.0 195.0 292.0

1687 rows × 28 columns


In [8]:
dfm.to_csv('../results/dfm_raw.csv', sep='\t')

In [42]:
#id_vars = ['TSS','TU_name','coord_5','coord_3','gene', 'UTR_length']
id_vars = ['gene', 'UTR_length']
value_vars = ['s9','s17','s19','s9+bcm','s17+bcm','s19+bcm']

dfn = dfm.copy()

def pseudo_counts(x):
    return x + 1 if x == 0 else x

# Normalize counts by gene and utr length
def norm_orf(barcode, rec):
    #return float(rec[barcode] / abs(rec['first_gene_5'] - rec['first_gene_3']))
    return rec[barcode]

def norm_utr(barcode, rec):
    return float(rec['utr_{0}'.format(barcode)] / rec['UTR_length'])

for barcode in barcodes:
    dfn[barcode] = dfn[barcode].apply(pseudo_counts)
    dfn['orf_{0}'.format(barcode)] = dfn.apply(lambda rec: norm_orf(barcode, rec), axis=1)
    dfn['utr_{0}'.format(barcode)] = dfn.apply(lambda rec: norm_utr(barcode, rec), axis=1)

    
df = dfn[id_vars].copy()
# Take means across replicates according to the samples dict
for sample, bcs in samples.items():
    df['orf_{0}'.format(sample)] = np.log10(dfn[['orf_{0}'.format(b) for b in list(bcs)]].mean(axis=1))
    df['utr_{0}'.format(sample)] = np.log10(dfn[['utr_{0}'.format(b) for b in list(bcs)]].mean(axis=1))
df


Out[42]:
gene UTR_length orf_s17+bcm utr_s17+bcm orf_s9+bcm utr_s9+bcm orf_s19 utr_s19 orf_s17 utr_s17 orf_s9 utr_s9 orf_s19+bcm utr_s19+bcm
0 yaaX 204 2.708846 0.336283 2.695919 0.247275 2.347330 -0.040117 2.403121 0.262079 2.430559 0.278642 2.886491 0.357356
1 yaaA 128 2.938269 0.256402 2.862131 0.180592 2.883945 0.096910 2.958803 0.281956 2.930440 0.236199 3.122380 0.432493
2 yaaA 156 2.938269 0.199572 2.862131 0.116506 2.883945 0.015048 2.958803 0.202202 2.930440 0.161944 3.122380 0.375077
3 yaaW 186 3.303952 0.639776 3.237292 0.536667 1.778151 -1.014240 1.740363 -0.889302 1.694605 -1.052029 3.678427 0.913899
4 yaaI 127 2.975891 0.491693 2.877083 0.414710 1.380211 -1.082614 1.612784 -0.836632 1.505150 -0.913472 3.217747 0.548443
5 yaaI 152 2.975891 0.493935 2.877083 0.397368 1.380211 -1.121146 1.612784 -0.903090 1.505150 -0.926571 3.217747 0.555747
6 dnaK 115 4.921624 1.762711 4.822005 1.618775 4.925830 1.564357 4.642825 1.520740 4.652493 1.332965 5.249165 2.117559
7 nhaA 172 3.623818 0.969811 3.533009 0.795880 3.015360 -0.212276 3.039811 0.005021 3.135451 -0.024675 4.024998 1.177100
8 rpsT 132 4.374583 1.885721 4.342205 1.840087 4.684401 2.194230 4.651273 2.171582 4.771216 2.320020 4.075273 1.683940
9 ileS 558 4.047664 0.705459 4.004063 0.602352 4.399579 0.973649 4.242777 0.994123 4.268601 0.927446 4.382755 1.030067
10 ileS 357 4.047664 0.823726 4.004063 0.722297 4.399579 1.111927 4.242777 1.113475 4.268601 1.045520 4.382755 1.187537
11 ileS 162 4.047664 1.092924 4.004063 0.983888 4.399579 1.395413 4.242777 1.388673 4.268601 1.318179 4.382755 1.485002
12 lspA 193 3.214446 0.871899 3.182985 0.808564 3.576341 1.153143 3.397505 1.084308 3.505828 1.125894 3.469085 1.145324
13 dapB 86 3.072434 0.211630 2.939769 0.014892 3.327461 0.186075 3.382737 0.343111 3.215770 0.226870 3.284769 0.382520
14 carA 100 3.066512 0.122216 3.012204 -0.026872 2.939769 -0.019997 2.683047 -0.207608 2.790637 -0.055517 3.277036 0.320146
15 caiF 82 2.871281 0.226065 2.666518 -0.016187 1.841985 -0.752446 1.911158 -0.783480 1.949390 -0.474481 3.349860 0.676694
16 caiE 128 2.716421 0.389720 2.601517 0.258278 1.822822 -0.851937 1.845098 -1.028029 1.894870 -1.028029 3.236411 0.832559
17 caiT 106 3.000000 -0.243550 2.804139 -0.396917 1.447158 -1.849215 1.525045 -1.724276 1.406540 -1.849215 3.257198 0.069863
18 yaaU 215 3.132900 0.431364 2.951095 0.206010 1.698970 -1.311249 1.883661 -1.101990 1.848189 -1.235528 3.497068 0.739259
19 kefF 166 2.877659 0.102111 2.708421 -0.041131 2.131939 -0.581619 2.285557 -0.384418 2.204120 -0.464233 3.149065 0.433587
20 apaG 428 3.241422 0.907318 3.180126 0.875940 3.258637 0.903280 3.209247 0.842481 3.263873 0.888712 3.421933 1.039405
21 rsmA 158 3.625878 0.943732 3.595276 0.908383 3.668712 1.031025 3.561876 0.993910 3.636137 1.091823 3.768638 1.085886
22 lptD 132 3.867379 0.995535 3.811474 0.893737 4.130028 1.122836 4.141246 1.258369 4.113308 1.230739 3.991293 1.040944
23 lptD 227 3.867379 0.788237 3.811474 0.685169 4.130028 0.916628 4.141246 1.042955 4.113308 1.019180 3.991293 0.839320
24 djlA 96 3.058995 0.250725 3.026125 0.112898 3.001517 0.246898 2.901458 0.152861 2.921946 0.200999 3.299507 0.396127
25 rluA 104 3.373096 0.911874 3.360972 0.894391 3.430398 1.014981 3.242293 0.749008 3.285557 0.894657 3.414973 0.988362
26 araC 146 3.018284 0.278910 2.873902 0.208559 2.419129 -0.242666 2.887898 0.177082 2.275311 -0.375478 3.206961 0.670386
27 thiQ 158 2.881955 0.456481 2.793790 0.409869 2.356026 0.069687 2.344392 0.041892 2.370143 0.113097 3.105169 0.654432
28 leuD 141 2.470557 -0.209700 2.419129 -0.248852 2.235528 -0.259917 2.105510 -0.459023 2.209515 -0.378367 2.764550 0.153977
29 leuL 316 2.181844 -0.149439 2.084576 -0.301030 2.288920 -0.431501 2.619093 -0.060354 2.359835 -0.275672 2.372912 0.076654
... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1657 yjiY 88 4.658188 1.594028 4.514016 1.358713 4.949766 1.630532 4.725209 1.560192 4.571528 1.195239 4.837756 1.731754
1658 tsr 245 4.325238 1.213707 4.211734 1.023882 2.461649 -0.703424 2.818226 -0.328468 2.542825 -0.579606 4.627064 1.384803
1659 yjjL 115 4.298569 1.382643 4.209729 1.209398 2.605844 -0.447914 2.739968 -0.200360 2.591621 -0.671532 4.866160 1.941058
1660 yjjL 122 4.298569 1.362810 4.209729 1.187104 2.605844 -0.473576 2.739968 -0.226022 2.591621 -0.679820 4.866160 1.917122
1661 yjjL 160 4.298569 1.333447 4.209729 1.160056 2.605844 -0.522879 2.739968 -0.274701 2.591621 -0.666301 4.866160 1.842785
1662 yjjM 96 4.277552 1.225498 4.113459 1.067334 2.343409 -0.602060 2.663701 -0.079181 2.263636 -0.739233 4.450780 1.386202
1663 yjjA 148 3.948780 1.469275 3.884654 1.387907 2.962132 0.276896 2.966611 0.257062 3.013890 0.334888 3.810904 1.378619
1664 dnaT 98 4.072673 1.816478 3.983942 1.737696 2.828338 0.491648 2.810569 0.509148 2.916717 0.630988 3.986144 1.708698
1665 dnaT 98 4.072673 1.816478 3.983942 1.737696 2.828338 0.491648 2.810569 0.509148 2.916717 0.630988 3.986144 1.708698
1666 yjjB 227 3.806858 1.384495 3.748769 1.401977 1.763428 -0.759429 1.897627 -0.539785 1.694605 -0.732777 3.665206 1.229435
1667 yjjB 227 3.806858 1.384495 3.748769 1.401977 1.763428 -0.759429 1.897627 -0.539785 1.694605 -0.732777 3.665206 1.229435
1668 yjjP 176 4.563665 1.782265 4.429203 1.607242 2.382917 -0.397324 2.546543 -0.267789 2.352183 -0.379225 4.377971 1.656436
1669 yjjQ 158 4.195651 1.407294 4.053002 1.253436 1.944483 -0.981173 2.146128 -0.767293 1.934498 -0.943385 4.156110 1.316886
1670 fhuF 89 4.646315 2.278317 4.550400 2.133718 3.503109 1.062603 3.383726 0.999756 3.355930 1.023969 4.594707 2.229443
1671 leuV 108 2.957368 2.124516 3.168055 2.485051 2.804480 2.422319 2.584896 2.441753 2.732796 2.585722 3.103462 2.116549
1672 rsmC 168 3.074816 0.637120 3.080085 0.636822 3.452706 0.892459 3.356217 0.878152 3.512217 1.073544 3.207904 0.685048
1673 osmY 243 3.722181 0.933812 3.635584 0.840607 2.414973 -0.354198 2.489255 -0.184209 2.482874 -0.165498 3.893318 1.032612
1674 deoC 644 3.610820 0.283133 3.367356 0.096370 3.513617 -0.182545 3.962227 0.343555 3.434489 -0.200895 3.819544 0.591479
1675 yjjJ 112 3.315656 0.732178 3.232742 0.783929 2.959041 1.033208 3.125969 1.138866 2.867467 1.163236 3.555215 0.952516
1676 lplA 121 3.049024 0.541497 3.032014 0.438353 2.863620 0.161492 2.683047 0.063343 2.596597 -0.031633 3.368752 0.766941
1677 yjjX 154 3.261739 0.805254 3.237418 0.719891 2.896526 0.615936 2.991226 0.818731 2.983852 0.730510 3.488762 1.026130
1678 creA 278 2.965437 0.569845 2.896526 0.457686 2.802432 0.571734 2.779596 0.582897 2.664172 0.573614 3.302980 0.837670
1679 arcA 202 3.695919 1.036281 3.680653 0.938926 4.071072 1.321963 4.022057 1.375437 4.048073 1.329378 4.256261 1.558863
1680 arcA 206 3.695919 1.029541 3.680653 0.932015 4.071072 1.314420 4.022057 1.367736 4.048073 1.321968 4.256261 1.551888
1681 arcA 229 3.695919 0.992347 3.680653 0.895558 4.071072 1.275246 4.022057 1.329740 4.048073 1.281936 4.256261 1.521635
1682 arcA 293 3.695919 0.916499 3.680653 0.823278 4.071072 1.194282 4.022057 1.253953 4.048073 1.196503 4.256261 1.486385
1683 arcA 375 3.695919 0.819982 3.680653 0.721646 4.071072 1.092534 4.022057 1.154241 4.048073 1.094960 4.256261 1.382305
1684 arcA 382 3.695919 0.812826 3.680653 0.713943 4.071072 1.084969 4.022057 1.146777 4.048073 1.087207 4.256261 1.374465
1685 arcA 495 3.695919 0.709972 3.680653 0.609023 4.071072 0.979613 4.022057 1.040754 4.048073 0.980072 4.256261 1.266412
1686 yjtD 104 2.469085 0.028290 2.356981 -0.017033 2.563481 0.212136 2.336460 0.150284 2.304275 0.032185 2.826399 0.369466

1687 rows × 14 columns


In [43]:
p = ggplot(df, aes(x='utr_s9', y='utr_s9+bcm', size='UTR_length')) \
        + geom_point(alpha=0.1) \
        + geom_abline(slope=1, intercept=0, size=2.5, color='#586e75')
print(p)


<ggplot: (8783559005255)>

In [44]:
p = ggplot(df, aes(x='orf_s9', y='orf_s9+bcm')) \
        + geom_point(alpha=0.1) \
        + geom_abline(slope=1, intercept=0, size=2.5, color='#586e75')
print(p)


<ggplot: (-9223363253305905997)>

In [15]:
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import euclidean_distances
from sklearn.neighbors import kneighbors_graph
from sklearn import cluster
from sklearn import mixture

X = df.as_matrix(columns=['utr_s9', 'utr_s9+bcm'])
X = StandardScaler().fit_transform(X)

bandwidth = cluster.estimate_bandwidth(X, quantile=0.3)
connectivity = kneighbors_graph(X, n_neighbors=20)
connectivity = 0.05 * (connectivity + connectivity.T)
#distances = euclidean_distances(X)

gmm = mixture.GMM(n_components=2, covariance_type='full')

ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
two_means = cluster.MiniBatchKMeans(n_clusters=2, batch_size=200)
kmeans = cluster.KMeans(n_clusters=2)
ward = cluster.AgglomerativeClustering(n_clusters=2, linkage='ward', connectivity=connectivity)
spectral = cluster.SpectralClustering(n_clusters=2, n_neighbors=20, eigen_solver='arpack', affinity='nearest_neighbors')
dbscan = cluster.DBSCAN(eps=.5)
affinity_propagation = cluster.AffinityPropagation(damping=.95, preference=-200)
average_linkage = cluster.AgglomerativeClustering(linkage='average', affinity='cityblock', n_clusters=2, connectivity=connectivity)

for name, alg in [
                    ('MiniBatchKMeans', two_means),
                    ('KMeans', kmeans),
                    ('AffinityPropagation', affinity_propagation),
                    ('MeanShift', ms),
                    ('GMM', gmm),
                    ('SpectralClustering', spectral),
                    ('Ward', ward),
                    ('AgglomerativeClustering', average_linkage),
                    ('DBSCAN', dbscan)
                ]:
    alg.fit(X)
    if hasattr(alg, 'labels_'):
        df['label'] = alg.labels_.astype(np.int)
    else:
        df['label'] = alg.predict(X)
    
    p = ggplot(df, aes(x='utr_s9', y='utr_s9+bcm', color='label')) \
        + geom_point(alpha=0.5) \
        + ggtitle(name) \
        + geom_abline(slope=1, intercept=0, size=2.5, color='#586e75')
    print(p)


/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/neighbors/graph.py:36: DeprecationWarning: The behavior of 'kneighbors_graph' when mode='connectivity' will change in version 0.18. Presently, the nearest neighbor of each sample is the sample itself. Beginning in version 0.18, the default behavior will be to exclude each sample from being its own nearest neighbor. To maintain the current behavior, set include_self=True.
  "behavior, set include_self=True.", DeprecationWarning)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1252: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, init_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:593: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, init_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:593: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, init_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:593: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, init_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, self.batch_size)
/home/ilya/.venv/pydata/lib/python3.4/site-packages/sklearn/cluster/k_means_.py:1301: DeprecationWarning: This function is deprecated. Please call randint(0, 1686 + 1) instead
  0, n_samples - 1, self.batch_size)
<ggplot: (-9223363253352715057)>
<ggplot: (8783583147219)>
<ggplot: (8783577909645)>
<ggplot: (-9223363253282665065)>
<ggplot: (8783569536618)>
<ggplot: (-9223363253285253979)>
<ggplot: (8783566966683)>
<ggplot: (8783566966690)>
<ggplot: (8783583156032)>

In [45]:
X = df.as_matrix(columns=['utr_s9', 'utr_s9+bcm'])
X = StandardScaler().fit_transform(X)
covar_type = ['spherical', 'diag', 'tied', 'full']

def rho_size(rec):
    if rec['gene'] == 'rpoS' and rec['UTR_length'] > 500:
        return 'rpoS'
    else:
        return ''
        

for covar in covar_type:
    gmm = mixture.GMM(n_components=2, covariance_type=covar)
    gmm.fit(X)
    df['label'] = gmm.predict(X)
    df['rho'] = df.apply(rho_size, axis=1)
    
    p = ggplot(df, aes(x='utr_s9', y='utr_s9+bcm', color='label', label='rho')) \
        + geom_point(alpha=0.24, size=5) \
        + geom_text(color="black") \
        + geom_abline(slope=1, intercept=0, size=2.5, color='#586e75') \
        + scale_x_continuous(name="-BCM (log(RPKM)") \
        + scale_y_continuous(name="+BCM (log(RPKM)") \
        + theme(axis_title=element_text(size=20),
                axis_text=element_text(size=20))
    print(p)


<ggplot: (-9223363253305990319)>
<ggplot: (-9223363253305931674)>
<ggplot: (-9223363253310980629)>
<ggplot: (-9223363253310931638)>

In [46]:
df


Out[46]:
gene UTR_length orf_s17+bcm utr_s17+bcm orf_s9+bcm utr_s9+bcm orf_s19 utr_s19 orf_s17 utr_s17 orf_s9 utr_s9 orf_s19+bcm utr_s19+bcm label rho
0 yaaX 204 2.708846 0.336283 2.695919 0.247275 2.347330 -0.040117 2.403121 0.262079 2.430559 0.278642 2.886491 0.357356 1
1 yaaA 128 2.938269 0.256402 2.862131 0.180592 2.883945 0.096910 2.958803 0.281956 2.930440 0.236199 3.122380 0.432493 1
2 yaaA 156 2.938269 0.199572 2.862131 0.116506 2.883945 0.015048 2.958803 0.202202 2.930440 0.161944 3.122380 0.375077 1
3 yaaW 186 3.303952 0.639776 3.237292 0.536667 1.778151 -1.014240 1.740363 -0.889302 1.694605 -1.052029 3.678427 0.913899 0
4 yaaI 127 2.975891 0.491693 2.877083 0.414710 1.380211 -1.082614 1.612784 -0.836632 1.505150 -0.913472 3.217747 0.548443 0
5 yaaI 152 2.975891 0.493935 2.877083 0.397368 1.380211 -1.121146 1.612784 -0.903090 1.505150 -0.926571 3.217747 0.555747 0
6 dnaK 115 4.921624 1.762711 4.822005 1.618775 4.925830 1.564357 4.642825 1.520740 4.652493 1.332965 5.249165 2.117559 1
7 nhaA 172 3.623818 0.969811 3.533009 0.795880 3.015360 -0.212276 3.039811 0.005021 3.135451 -0.024675 4.024998 1.177100 0
8 rpsT 132 4.374583 1.885721 4.342205 1.840087 4.684401 2.194230 4.651273 2.171582 4.771216 2.320020 4.075273 1.683940 1
9 ileS 558 4.047664 0.705459 4.004063 0.602352 4.399579 0.973649 4.242777 0.994123 4.268601 0.927446 4.382755 1.030067 1
10 ileS 357 4.047664 0.823726 4.004063 0.722297 4.399579 1.111927 4.242777 1.113475 4.268601 1.045520 4.382755 1.187537 1
11 ileS 162 4.047664 1.092924 4.004063 0.983888 4.399579 1.395413 4.242777 1.388673 4.268601 1.318179 4.382755 1.485002 1
12 lspA 193 3.214446 0.871899 3.182985 0.808564 3.576341 1.153143 3.397505 1.084308 3.505828 1.125894 3.469085 1.145324 1
13 dapB 86 3.072434 0.211630 2.939769 0.014892 3.327461 0.186075 3.382737 0.343111 3.215770 0.226870 3.284769 0.382520 1
14 carA 100 3.066512 0.122216 3.012204 -0.026872 2.939769 -0.019997 2.683047 -0.207608 2.790637 -0.055517 3.277036 0.320146 1
15 caiF 82 2.871281 0.226065 2.666518 -0.016187 1.841985 -0.752446 1.911158 -0.783480 1.949390 -0.474481 3.349860 0.676694 0
16 caiE 128 2.716421 0.389720 2.601517 0.258278 1.822822 -0.851937 1.845098 -1.028029 1.894870 -1.028029 3.236411 0.832559 0
17 caiT 106 3.000000 -0.243550 2.804139 -0.396917 1.447158 -1.849215 1.525045 -1.724276 1.406540 -1.849215 3.257198 0.069863 0
18 yaaU 215 3.132900 0.431364 2.951095 0.206010 1.698970 -1.311249 1.883661 -1.101990 1.848189 -1.235528 3.497068 0.739259 0
19 kefF 166 2.877659 0.102111 2.708421 -0.041131 2.131939 -0.581619 2.285557 -0.384418 2.204120 -0.464233 3.149065 0.433587 0
20 apaG 428 3.241422 0.907318 3.180126 0.875940 3.258637 0.903280 3.209247 0.842481 3.263873 0.888712 3.421933 1.039405 1
21 rsmA 158 3.625878 0.943732 3.595276 0.908383 3.668712 1.031025 3.561876 0.993910 3.636137 1.091823 3.768638 1.085886 1
22 lptD 132 3.867379 0.995535 3.811474 0.893737 4.130028 1.122836 4.141246 1.258369 4.113308 1.230739 3.991293 1.040944 1
23 lptD 227 3.867379 0.788237 3.811474 0.685169 4.130028 0.916628 4.141246 1.042955 4.113308 1.019180 3.991293 0.839320 1
24 djlA 96 3.058995 0.250725 3.026125 0.112898 3.001517 0.246898 2.901458 0.152861 2.921946 0.200999 3.299507 0.396127 1
25 rluA 104 3.373096 0.911874 3.360972 0.894391 3.430398 1.014981 3.242293 0.749008 3.285557 0.894657 3.414973 0.988362 1
26 araC 146 3.018284 0.278910 2.873902 0.208559 2.419129 -0.242666 2.887898 0.177082 2.275311 -0.375478 3.206961 0.670386 0
27 thiQ 158 2.881955 0.456481 2.793790 0.409869 2.356026 0.069687 2.344392 0.041892 2.370143 0.113097 3.105169 0.654432 0
28 leuD 141 2.470557 -0.209700 2.419129 -0.248852 2.235528 -0.259917 2.105510 -0.459023 2.209515 -0.378367 2.764550 0.153977 1
29 leuL 316 2.181844 -0.149439 2.084576 -0.301030 2.288920 -0.431501 2.619093 -0.060354 2.359835 -0.275672 2.372912 0.076654 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1657 yjiY 88 4.658188 1.594028 4.514016 1.358713 4.949766 1.630532 4.725209 1.560192 4.571528 1.195239 4.837756 1.731754 1
1658 tsr 245 4.325238 1.213707 4.211734 1.023882 2.461649 -0.703424 2.818226 -0.328468 2.542825 -0.579606 4.627064 1.384803 0
1659 yjjL 115 4.298569 1.382643 4.209729 1.209398 2.605844 -0.447914 2.739968 -0.200360 2.591621 -0.671532 4.866160 1.941058 0
1660 yjjL 122 4.298569 1.362810 4.209729 1.187104 2.605844 -0.473576 2.739968 -0.226022 2.591621 -0.679820 4.866160 1.917122 0
1661 yjjL 160 4.298569 1.333447 4.209729 1.160056 2.605844 -0.522879 2.739968 -0.274701 2.591621 -0.666301 4.866160 1.842785 0
1662 yjjM 96 4.277552 1.225498 4.113459 1.067334 2.343409 -0.602060 2.663701 -0.079181 2.263636 -0.739233 4.450780 1.386202 0
1663 yjjA 148 3.948780 1.469275 3.884654 1.387907 2.962132 0.276896 2.966611 0.257062 3.013890 0.334888 3.810904 1.378619 0
1664 dnaT 98 4.072673 1.816478 3.983942 1.737696 2.828338 0.491648 2.810569 0.509148 2.916717 0.630988 3.986144 1.708698 0
1665 dnaT 98 4.072673 1.816478 3.983942 1.737696 2.828338 0.491648 2.810569 0.509148 2.916717 0.630988 3.986144 1.708698 0
1666 yjjB 227 3.806858 1.384495 3.748769 1.401977 1.763428 -0.759429 1.897627 -0.539785 1.694605 -0.732777 3.665206 1.229435 0
1667 yjjB 227 3.806858 1.384495 3.748769 1.401977 1.763428 -0.759429 1.897627 -0.539785 1.694605 -0.732777 3.665206 1.229435 0
1668 yjjP 176 4.563665 1.782265 4.429203 1.607242 2.382917 -0.397324 2.546543 -0.267789 2.352183 -0.379225 4.377971 1.656436 0
1669 yjjQ 158 4.195651 1.407294 4.053002 1.253436 1.944483 -0.981173 2.146128 -0.767293 1.934498 -0.943385 4.156110 1.316886 0
1670 fhuF 89 4.646315 2.278317 4.550400 2.133718 3.503109 1.062603 3.383726 0.999756 3.355930 1.023969 4.594707 2.229443 0
1671 leuV 108 2.957368 2.124516 3.168055 2.485051 2.804480 2.422319 2.584896 2.441753 2.732796 2.585722 3.103462 2.116549 1
1672 rsmC 168 3.074816 0.637120 3.080085 0.636822 3.452706 0.892459 3.356217 0.878152 3.512217 1.073544 3.207904 0.685048 1
1673 osmY 243 3.722181 0.933812 3.635584 0.840607 2.414973 -0.354198 2.489255 -0.184209 2.482874 -0.165498 3.893318 1.032612 0
1674 deoC 644 3.610820 0.283133 3.367356 0.096370 3.513617 -0.182545 3.962227 0.343555 3.434489 -0.200895 3.819544 0.591479 0
1675 yjjJ 112 3.315656 0.732178 3.232742 0.783929 2.959041 1.033208 3.125969 1.138866 2.867467 1.163236 3.555215 0.952516 1
1676 lplA 121 3.049024 0.541497 3.032014 0.438353 2.863620 0.161492 2.683047 0.063343 2.596597 -0.031633 3.368752 0.766941 0
1677 yjjX 154 3.261739 0.805254 3.237418 0.719891 2.896526 0.615936 2.991226 0.818731 2.983852 0.730510 3.488762 1.026130 1
1678 creA 278 2.965437 0.569845 2.896526 0.457686 2.802432 0.571734 2.779596 0.582897 2.664172 0.573614 3.302980 0.837670 1
1679 arcA 202 3.695919 1.036281 3.680653 0.938926 4.071072 1.321963 4.022057 1.375437 4.048073 1.329378 4.256261 1.558863 1
1680 arcA 206 3.695919 1.029541 3.680653 0.932015 4.071072 1.314420 4.022057 1.367736 4.048073 1.321968 4.256261 1.551888 1
1681 arcA 229 3.695919 0.992347 3.680653 0.895558 4.071072 1.275246 4.022057 1.329740 4.048073 1.281936 4.256261 1.521635 1
1682 arcA 293 3.695919 0.916499 3.680653 0.823278 4.071072 1.194282 4.022057 1.253953 4.048073 1.196503 4.256261 1.486385 1
1683 arcA 375 3.695919 0.819982 3.680653 0.721646 4.071072 1.092534 4.022057 1.154241 4.048073 1.094960 4.256261 1.382305 1
1684 arcA 382 3.695919 0.812826 3.680653 0.713943 4.071072 1.084969 4.022057 1.146777 4.048073 1.087207 4.256261 1.374465 1
1685 arcA 495 3.695919 0.709972 3.680653 0.609023 4.071072 0.979613 4.022057 1.040754 4.048073 0.980072 4.256261 1.266412 1
1686 yjtD 104 2.469085 0.028290 2.356981 -0.017033 2.563481 0.212136 2.336460 0.150284 2.304275 0.032185 2.826399 0.369466 1

1687 rows × 16 columns


In [48]:
df[df['gene'] == 'alaC'][['utr_s9','utr_s9+bcm', 'label']]


Out[48]:
utr_s9 utr_s9+bcm label
868 0.124416 0.314394 0

In [49]:
df.to_csv('../results/utr_bcm.csv', sep='\t')

In [25]:
X = df.as_matrix(columns=['utr_s9', 'utr_s9+bcm'])
#X = StandardScaler().fit_transform(X)

gmm = mixture.GMM(n_components=2, covariance_type='full')
gmm.fit(X)
df['label'] = gmm.predict(X)
df


Out[25]:
gene UTR_length orf_s17+bcm utr_s17+bcm orf_s9+bcm utr_s9+bcm orf_s19 utr_s19 orf_s17 utr_s17 orf_s9 utr_s9 orf_s19+bcm utr_s19+bcm label
0 yaaX 204 2.708846 0.336283 2.695919 0.247275 2.347330 -0.040117 2.403121 0.262079 2.430559 0.278642 2.886491 0.357356 1
1 yaaA 128 2.938269 0.256402 2.862131 0.180592 2.883945 0.096910 2.958803 0.281956 2.930440 0.236199 3.122380 0.432493 1
2 yaaA 156 2.938269 0.199572 2.862131 0.116506 2.883945 0.015048 2.958803 0.202202 2.930440 0.161944 3.122380 0.375077 1
3 yaaW 186 3.303952 0.639776 3.237292 0.536667 1.778151 -1.014240 1.740363 -0.889302 1.694605 -1.052029 3.678427 0.913899 0
4 yaaI 127 2.975891 0.491693 2.877083 0.414710 1.380211 -1.082614 1.612784 -0.836632 1.505150 -0.913472 3.217747 0.548443 0
5 yaaI 152 2.975891 0.493935 2.877083 0.397368 1.380211 -1.121146 1.612784 -0.903090 1.505150 -0.926571 3.217747 0.555747 0
6 dnaK 115 4.921624 1.762711 4.822005 1.618775 4.925830 1.564357 4.642825 1.520740 4.652493 1.332965 5.249165 2.117559 1
7 nhaA 172 3.623818 0.969811 3.533009 0.795880 3.015360 -0.212276 3.039811 0.005021 3.135451 -0.024675 4.024998 1.177100 0
8 rpsT 132 4.374583 1.885721 4.342205 1.840087 4.684401 2.194230 4.651273 2.171582 4.771216 2.320020 4.075273 1.683940 1
9 ileS 558 4.047664 0.705459 4.004063 0.602352 4.399579 0.973649 4.242777 0.994123 4.268601 0.927446 4.382755 1.030067 1
10 ileS 357 4.047664 0.823726 4.004063 0.722297 4.399579 1.111927 4.242777 1.113475 4.268601 1.045520 4.382755 1.187537 1
11 ileS 162 4.047664 1.092924 4.004063 0.983888 4.399579 1.395413 4.242777 1.388673 4.268601 1.318179 4.382755 1.485002 1
12 lspA 193 3.214446 0.871899 3.182985 0.808564 3.576341 1.153143 3.397505 1.084308 3.505828 1.125894 3.469085 1.145324 1
13 dapB 86 3.072434 0.211630 2.939769 0.014892 3.327461 0.186075 3.382737 0.343111 3.215770 0.226870 3.284769 0.382520 1
14 carA 100 3.066512 0.122216 3.012204 -0.026872 2.939769 -0.019997 2.683047 -0.207608 2.790637 -0.055517 3.277036 0.320146 1
15 caiF 82 2.871281 0.226065 2.666518 -0.016187 1.841985 -0.752446 1.911158 -0.783480 1.949390 -0.474481 3.349860 0.676694 0
16 caiE 128 2.716421 0.389720 2.601517 0.258278 1.822822 -0.851937 1.845098 -1.028029 1.894870 -1.028029 3.236411 0.832559 0
17 caiT 106 3.000000 -0.243550 2.804139 -0.396917 1.447158 -1.849215 1.525045 -1.724276 1.406540 -1.849215 3.257198 0.069863 0
18 yaaU 215 3.132900 0.431364 2.951095 0.206010 1.698970 -1.311249 1.883661 -1.101990 1.848189 -1.235528 3.497068 0.739259 0
19 kefF 166 2.877659 0.102111 2.708421 -0.041131 2.131939 -0.581619 2.285557 -0.384418 2.204120 -0.464233 3.149065 0.433587 0
20 apaG 428 3.241422 0.907318 3.180126 0.875940 3.258637 0.903280 3.209247 0.842481 3.263873 0.888712 3.421933 1.039405 1
21 rsmA 158 3.625878 0.943732 3.595276 0.908383 3.668712 1.031025 3.561876 0.993910 3.636137 1.091823 3.768638 1.085886 1
22 lptD 132 3.867379 0.995535 3.811474 0.893737 4.130028 1.122836 4.141246 1.258369 4.113308 1.230739 3.991293 1.040944 1
23 lptD 227 3.867379 0.788237 3.811474 0.685169 4.130028 0.916628 4.141246 1.042955 4.113308 1.019180 3.991293 0.839320 1
24 djlA 96 3.058995 0.250725 3.026125 0.112898 3.001517 0.246898 2.901458 0.152861 2.921946 0.200999 3.299507 0.396127 1
25 rluA 104 3.373096 0.911874 3.360972 0.894391 3.430398 1.014981 3.242293 0.749008 3.285557 0.894657 3.414973 0.988362 1
26 araC 146 3.018284 0.278910 2.873902 0.208559 2.419129 -0.242666 2.887898 0.177082 2.275311 -0.375478 3.206961 0.670386 0
27 thiQ 158 2.881955 0.456481 2.793790 0.409869 2.356026 0.069687 2.344392 0.041892 2.370143 0.113097 3.105169 0.654432 0
28 leuD 141 2.470557 -0.209700 2.419129 -0.248852 2.235528 -0.259917 2.105510 -0.459023 2.209515 -0.378367 2.764550 0.153977 1
29 leuL 316 2.181844 -0.149439 2.084576 -0.301030 2.288920 -0.431501 2.619093 -0.060354 2.359835 -0.275672 2.372912 0.076654 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1657 yjiY 88 4.658188 1.594028 4.514016 1.358713 4.949766 1.630532 4.725209 1.560192 4.571528 1.195239 4.837756 1.731754 1
1658 tsr 245 4.325238 1.213707 4.211734 1.023882 2.461649 -0.703424 2.818226 -0.328468 2.542825 -0.579606 4.627064 1.384803 0
1659 yjjL 115 4.298569 1.382643 4.209729 1.209398 2.605844 -0.447914 2.739968 -0.200360 2.591621 -0.671532 4.866160 1.941058 0
1660 yjjL 122 4.298569 1.362810 4.209729 1.187104 2.605844 -0.473576 2.739968 -0.226022 2.591621 -0.679820 4.866160 1.917122 0
1661 yjjL 160 4.298569 1.333447 4.209729 1.160056 2.605844 -0.522879 2.739968 -0.274701 2.591621 -0.666301 4.866160 1.842785 0
1662 yjjM 96 4.277552 1.225498 4.113459 1.067334 2.343409 -0.602060 2.663701 -0.079181 2.263636 -0.739233 4.450780 1.386202 0
1663 yjjA 148 3.948780 1.469275 3.884654 1.387907 2.962132 0.276896 2.966611 0.257062 3.013890 0.334888 3.810904 1.378619 0
1664 dnaT 98 4.072673 1.816478 3.983942 1.737696 2.828338 0.491648 2.810569 0.509148 2.916717 0.630988 3.986144 1.708698 0
1665 dnaT 98 4.072673 1.816478 3.983942 1.737696 2.828338 0.491648 2.810569 0.509148 2.916717 0.630988 3.986144 1.708698 0
1666 yjjB 227 3.806858 1.384495 3.748769 1.401977 1.763428 -0.759429 1.897627 -0.539785 1.694605 -0.732777 3.665206 1.229435 0
1667 yjjB 227 3.806858 1.384495 3.748769 1.401977 1.763428 -0.759429 1.897627 -0.539785 1.694605 -0.732777 3.665206 1.229435 0
1668 yjjP 176 4.563665 1.782265 4.429203 1.607242 2.382917 -0.397324 2.546543 -0.267789 2.352183 -0.379225 4.377971 1.656436 0
1669 yjjQ 158 4.195651 1.407294 4.053002 1.253436 1.944483 -0.981173 2.146128 -0.767293 1.934498 -0.943385 4.156110 1.316886 0
1670 fhuF 89 4.646315 2.278317 4.550400 2.133718 3.503109 1.062603 3.383726 0.999756 3.355930 1.023969 4.594707 2.229443 0
1671 leuV 108 2.957368 2.124516 3.168055 2.485051 2.804480 2.422319 2.584896 2.441753 2.732796 2.585722 3.103462 2.116549 1
1672 rsmC 168 3.074816 0.637120 3.080085 0.636822 3.452706 0.892459 3.356217 0.878152 3.512217 1.073544 3.207904 0.685048 1
1673 osmY 243 3.722181 0.933812 3.635584 0.840607 2.414973 -0.354198 2.489255 -0.184209 2.482874 -0.165498 3.893318 1.032612 0
1674 deoC 644 3.610820 0.283133 3.367356 0.096370 3.513617 -0.182545 3.962227 0.343555 3.434489 -0.200895 3.819544 0.591479 0
1675 yjjJ 112 3.315656 0.732178 3.232742 0.783929 2.959041 1.033208 3.125969 1.138866 2.867467 1.163236 3.555215 0.952516 1
1676 lplA 121 3.049024 0.541497 3.032014 0.438353 2.863620 0.161492 2.683047 0.063343 2.596597 -0.031633 3.368752 0.766941 0
1677 yjjX 154 3.261739 0.805254 3.237418 0.719891 2.896526 0.615936 2.991226 0.818731 2.983852 0.730510 3.488762 1.026130 1
1678 creA 278 2.965437 0.569845 2.896526 0.457686 2.802432 0.571734 2.779596 0.582897 2.664172 0.573614 3.302980 0.837670 1
1679 arcA 202 3.695919 1.036281 3.680653 0.938926 4.071072 1.321963 4.022057 1.375437 4.048073 1.329378 4.256261 1.558863 1
1680 arcA 206 3.695919 1.029541 3.680653 0.932015 4.071072 1.314420 4.022057 1.367736 4.048073 1.321968 4.256261 1.551888 1
1681 arcA 229 3.695919 0.992347 3.680653 0.895558 4.071072 1.275246 4.022057 1.329740 4.048073 1.281936 4.256261 1.521635 1
1682 arcA 293 3.695919 0.916499 3.680653 0.823278 4.071072 1.194282 4.022057 1.253953 4.048073 1.196503 4.256261 1.486385 1
1683 arcA 375 3.695919 0.819982 3.680653 0.721646 4.071072 1.092534 4.022057 1.154241 4.048073 1.094960 4.256261 1.382305 1
1684 arcA 382 3.695919 0.812826 3.680653 0.713943 4.071072 1.084969 4.022057 1.146777 4.048073 1.087207 4.256261 1.374465 1
1685 arcA 495 3.695919 0.709972 3.680653 0.609023 4.071072 0.979613 4.022057 1.040754 4.048073 0.980072 4.256261 1.266412 1
1686 yjtD 104 2.469085 0.028290 2.356981 -0.017033 2.563481 0.212136 2.336460 0.150284 2.304275 0.032185 2.826399 0.369466 1

1687 rows × 15 columns


In [59]:
df[df['gene'] == 'rpoS'][['utr_s9', 'utr_s9+bcm', 'label']]


Out[59]:
utr_s9 utr_s9+bcm label
985 0.454030 0.360994 1
986 0.397940 0.306022 1
987 0.353966 0.172502 1

In [27]:
df.to_csv('../results/utr_bcm.csv', sep='\t')

In [57]:
df[(df['label'] == 1) & (df['utr_s9'] < df['utr_s9+bcm'])]


Out[57]:
gene UTR_length orf_s17+bcm utr_s17+bcm orf_s9+bcm utr_s9+bcm orf_s19 utr_s19 orf_s17 utr_s17 orf_s9 utr_s9 orf_s19+bcm utr_s19+bcm label rho
6 dnaK 115 4.921624 1.762711 4.822005 1.618775 4.925830 1.564357 4.642825 1.520740 4.652493 1.332965 5.249165 2.117559 1
14 carA 100 3.066512 0.122216 3.012204 -0.026872 2.939769 -0.019997 2.683047 -0.207608 2.790637 -0.055517 3.277036 0.320146 1
28 leuD 141 2.470557 -0.209700 2.419129 -0.248852 2.235528 -0.259917 2.105510 -0.459023 2.209515 -0.378367 2.764550 0.153977 1
82 yafK 188 3.426674 0.647529 3.325310 0.552241 3.316704 0.449298 3.274620 0.471307 3.284769 0.519283 3.613419 0.794584 1
86 proB 172 3.563422 0.723513 3.508866 0.640978 3.440594 0.295950 3.470190 0.485044 3.476687 0.551932 3.721316 0.859292 1
87 proB 85 3.563422 0.983865 3.508866 0.905955 3.440594 0.601421 3.470190 0.790327 3.476687 0.855198 3.721316 1.121927 1
118 yahM 152 3.052694 1.153514 2.923503 1.028876 1.897627 0.596669 2.341435 0.930929 2.062582 0.812913 3.133539 1.092430 1
168 ybaB 367 3.483516 0.846280 3.408325 0.893291 3.492551 0.869184 3.346451 0.771093 3.329296 0.847458 3.512951 0.858744 1
169 htpG 303 4.694824 1.356020 4.623596 1.302032 4.552984 1.242751 4.307785 1.209196 4.315404 1.206265 5.011251 1.478862 1
173 ybaP 182 3.088136 0.462151 3.010724 0.280883 2.678973 0.143049 2.791691 0.514810 2.629410 0.175295 3.493458 0.703716 1
174 ybaP 184 3.088136 0.468781 3.010724 0.282341 2.678973 0.151823 2.791691 0.518013 2.629410 0.181564 3.493458 0.705529 1
175 ybaP 204 3.088136 0.478538 3.010724 0.278642 2.678973 0.160927 2.791691 0.532667 2.629410 0.183130 3.493458 0.707612 1
177 ybbN 211 3.731186 0.646993 3.636789 0.466355 3.697796 0.405287 3.543571 0.432734 3.504063 0.361011 4.135959 0.927356 1
178 ybbN 289 3.731186 0.587155 3.636789 0.444358 3.697796 0.356337 3.543571 0.386366 3.504063 0.340849 4.135959 0.860804 1
179 ybbO 180 3.161218 0.267823 3.118099 0.233278 3.016824 0.079181 3.007534 0.277482 3.004751 0.231866 3.342620 0.537819 1
249 kdpE 161 2.528274 0.044812 2.457125 -0.029289 2.243038 -0.215600 2.281033 -0.071693 2.310693 -0.159551 2.782831 0.380885 1
251 rhsC 204 2.878234 -0.496717 2.743902 -0.593627 2.033424 -0.977192 2.167317 -0.791116 2.019116 -0.811320 3.161967 -0.357807 1
254 ybgI 208 2.854610 0.148063 2.760045 -0.062791 2.981366 -0.035895 3.204527 0.422694 2.865104 -0.081274 3.225180 0.417935 1
259 gltA 202 3.747412 0.463287 3.667873 0.461433 4.225710 1.000645 3.981161 0.866083 3.638389 0.400085 4.200632 0.987901 1
260 gltA 299 3.747412 0.329830 3.667873 0.312851 4.225710 0.832359 3.981161 0.730615 3.638389 0.251056 4.200632 0.836929 1
263 sucA 120 4.199467 0.973513 4.177883 0.907590 4.408816 1.154323 4.365638 1.197510 4.140272 0.906021 4.303962 1.068495 1
264 sucA 120 4.199467 0.973513 4.177883 0.907590 4.408816 1.154323 4.365638 1.197510 4.140272 0.906021 4.303962 1.068495 1
314 ybjS 145 3.223496 0.257761 3.126294 0.058740 3.016407 -0.197580 2.993657 0.021902 3.046105 -0.100670 3.449941 0.408006 1
322 clpA 184 3.941710 0.393671 3.938545 0.382076 3.885220 0.379621 3.871076 0.562874 3.765892 0.311524 4.048364 0.622237 1
323 clpA 172 3.941710 0.408910 3.938545 0.402961 3.885220 0.399452 3.871076 0.585330 3.765892 0.331498 4.048364 0.644141 1
350 ycaI 82 3.037227 0.437402 2.916717 0.265163 2.305351 0.072958 2.376577 0.133461 2.331427 0.247554 3.232742 0.367220 1
359 pqiA 342 3.367076 0.345070 3.327052 0.299758 3.070776 0.268063 2.994537 0.243038 3.064458 0.275197 3.328889 0.329297 1
360 pqiA 335 3.367076 0.351750 3.327052 0.305864 3.070776 0.269792 2.994537 0.247644 3.064458 0.277044 3.328889 0.332288 1
369 hyaA 155 2.008600 -0.588272 1.860338 -0.959883 1.041393 -1.260913 1.290035 -0.986212 1.146128 -1.315270 2.429752 -0.148939 1
398 ymdA 189 2.208173 -0.183040 2.115611 -0.251156 1.662758 -0.633009 1.763428 -0.509306 1.851258 -0.395648 2.429752 0.009096 1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
1455 aspT 92 2.080987 2.193412 1.908485 2.152438 2.102091 2.218912 2.095169 2.348939 1.774517 2.135737 1.648360 1.588333 1
1456 trpT 177 2.982723 1.986671 3.301464 1.976366 3.045519 2.019210 3.016824 2.134449 3.193542 1.953260 2.525045 1.417092 1
1469 cyaA 363 4.078294 0.622651 4.041866 0.518550 3.921400 0.553201 3.809795 0.584668 3.842297 0.494515 4.380528 0.915183 1
1470 cyaA 154 4.078294 0.904146 4.041866 0.812696 3.921400 0.876188 3.809795 0.861697 3.842297 0.807676 4.380528 1.227202 1
1471 corA 232 3.975822 1.312028 3.927345 1.220367 3.377033 0.853179 3.459242 0.939648 3.563837 1.058004 4.091614 1.390730 1
1480 rfaH 144 3.217484 0.685182 3.167908 0.570802 3.066699 0.483607 2.968483 0.548782 3.113442 0.542341 3.309949 0.881844 1
1484 mobA 122 2.798996 0.373033 2.723866 0.291129 2.100371 0.124494 2.183270 0.338522 1.937016 0.255075 2.701568 0.354549 1
1504 yiiG 121 3.087781 0.091856 2.923503 -0.171628 1.748188 -0.503002 1.944483 -0.564271 1.785330 -0.237687 3.437195 0.266492 1
1517 yijO 112 2.997168 0.487340 2.783546 0.392477 2.115611 -0.005856 2.403121 0.281196 2.089905 0.267800 3.219715 0.645826 1
1521 btuB 240 3.530136 0.871184 3.412293 0.802916 3.465160 0.797325 3.428783 0.921036 3.346255 0.783099 3.876737 1.218251 1
1527 secE 181 4.181400 1.854256 4.215307 1.893231 4.092966 1.663748 3.955760 1.633887 4.079272 1.761479 3.927422 1.498767 1
1530 rpoB 182 5.252414 1.969546 5.252573 1.966902 5.080502 1.830081 5.092110 1.891513 5.154594 1.936671 5.119121 1.775158 1
1533 rsd 146 3.142546 0.382806 3.013048 0.189756 2.868056 0.143143 3.267875 0.410835 2.828338 0.094524 3.347037 0.600570 1
1577 proP 182 3.804889 0.884347 4.057514 1.247784 3.946157 1.201727 3.376212 0.682681 3.766859 1.074583 4.296347 1.469012 1
1578 proP 95 3.804889 1.160105 4.057514 1.527766 3.946157 1.482874 3.376212 0.963042 3.766859 1.356026 4.296347 1.749126 1
1588 groL 242 5.011257 1.871252 4.935107 1.777672 5.027376 1.877876 4.854731 1.783576 4.851475 1.660842 5.294230 2.171352 1
1589 groL 152 5.011257 1.932116 4.935107 1.842724 5.027376 1.966281 4.854731 1.848999 4.851475 1.733187 5.294230 2.265221 1
1604 nsrR 146 3.286793 1.099874 3.154728 1.024154 2.598791 0.746005 2.644439 0.857663 2.516535 0.882727 3.530136 1.416971 1
1610 ytfB 102 3.101231 0.465616 3.049412 0.398791 2.948168 0.349335 2.934751 0.362468 2.885926 0.301030 3.294687 0.649888 1
1611 cycA 84 3.626340 0.898542 3.591343 0.823909 3.818787 1.008702 3.357935 0.815688 3.207769 0.617923 3.911104 1.210694 1
1615 msrA 85 3.024280 0.659972 2.915927 0.502748 2.652730 0.227433 2.902818 0.696921 2.677607 0.352750 3.165541 0.674807 1
1619 yjgA 181 3.427405 0.825824 3.362482 0.573871 3.516866 0.422657 3.470337 0.480309 3.518646 0.503873 3.440752 0.642689 1
1620 yjgA 200 3.427405 0.803969 3.362482 0.554186 3.516866 0.404834 3.470337 0.454464 3.518646 0.478566 3.440752 0.627878 1
1621 yjgA 216 3.427405 0.787762 3.362482 0.538285 3.516866 0.388180 3.470337 0.428600 3.518646 0.453361 3.440752 0.612244 1
1627 yjgN 164 2.893762 0.159904 2.650308 -0.076541 1.267172 -0.752446 1.484300 -0.277828 1.332438 -0.225839 3.123852 0.340251 1
1628 yjgM 134 3.071698 0.232731 2.975891 0.040213 2.619615 -0.415298 2.690196 -0.049737 2.821514 -0.044319 3.258158 0.417583 1
1638 fecE 182 3.151523 0.503731 3.066885 0.394105 3.043362 0.304003 2.852175 0.292597 2.936011 0.313380 3.314183 0.574985 1
1639 fecE 187 3.151523 0.501579 3.066885 0.396078 3.043362 0.301610 2.852175 0.299285 2.936011 0.318666 3.314183 0.569831 1
1655 yjiA 130 3.834134 1.327752 3.670014 1.223915 3.725952 1.190332 3.619146 1.239877 3.392785 0.954614 4.012753 1.519070 1
1657 yjiY 88 4.658188 1.594028 4.514016 1.358713 4.949766 1.630532 4.725209 1.560192 4.571528 1.195239 4.837756 1.731754 1

194 rows × 16 columns


In [63]:
# Sample titles with corresponding barcodes
samples = {
    's9': ['ATCACG', 'ACAGTG'],
    's9+bcm': ['CGATGT', 'GCCAAT'],
    's17': ['TTAGGC', 'GATCAG'],
    's17+bcm': ['TGACCA', 'TAGCTT'],
    's19': ['CAGATC','GGCTAC'],
    's19+bcm': ['ACTTGA', 'CTTGTA']
}

dfm[dfm['gene'] == 'rpoS'][['gene', 'UTR_length', 'utr_ATCACG', 'utr_ACAGTG', 'utr_CGATGT', 'utr_GCCAAT']]


Out[63]:
gene UTR_length utr_ATCACG utr_ACAGTG utr_CGATGT utr_GCCAAT
985 rpoS 103 207.0 379.0 170.0 303.0
986 rpoS 173 376.0 489.0 221.0 479.0
987 rpoS 567 971.0 1591.0 708.0 979.0

In [64]:
rpoS = dfm[dfm['gene'] == 'rpoS'][['gene', 'UTR_length', 'utr_ATCACG', 'utr_ACAGTG', 'utr_CGATGT', 'utr_GCCAAT']].copy()
rpoS.rename(columns={
        'utr_ATCACG': 'utr_s9R1',
        'utr_ACAGTG': 'utr_s9R2',
        'utr_CGATGT': 'utr_s9+bcmR1',
        'utr_GCCAAT': 'utr_s9+bcmR2'
    }, inplace=True)
rpoS


Out[64]:
gene UTR_length utr_s9R1 utr_s9R2 utr_s9+bcmR1 utr_s9+bcmR2
985 rpoS 103 207.0 379.0 170.0 303.0
986 rpoS 173 376.0 489.0 221.0 479.0
987 rpoS 567 971.0 1591.0 708.0 979.0

In [70]:
drep = dfm[['gene', 'UTR_length']]

res = []
for k,v in samples.items():
    d = drep.copy()
    for i,barcode in enumerate(v):
        d['R{}'.format(i+1)] = np.log10(dfm['utr_{}'.format(barcode)])
    d['sample'] = k
    res.append(d)
long = pd.concat(res)
long


Out[70]:
gene UTR_length R1 R2 sample
0 yaaX 204 2.642465 2.649335 s17+bcm
1 yaaA 128 2.257679 2.448706 s17+bcm
2 yaaA 156 2.281033 2.481443 s17+bcm
3 yaaW 186 2.753583 3.023664 s17+bcm
4 yaaI 127 2.589950 2.600973 s17+bcm
5 yaaI 152 2.641474 2.707570 s17+bcm
6 dnaK 115 3.802774 3.843108 s17+bcm
7 nhaA 172 3.114277 3.280578 s17+bcm
8 rpsT 132 4.037904 3.972203 s17+bcm
9 ileS 558 3.399501 3.498999 s17+bcm
10 ileS 357 3.310268 3.433770 s17+bcm
11 ileS 162 3.234264 3.361350 s17+bcm
12 lspA 193 3.072617 3.228400 s17+bcm
13 dapB 86 2.152288 2.139879 s17+bcm
14 carA 100 2.093422 2.149219 s17+bcm
15 caiF 82 2.008600 2.240549 s17+bcm
16 caiE 128 2.305351 2.629410 s17+bcm
17 caiT 106 1.518514 1.944483 s17+bcm
18 yaaU 215 2.474216 2.936011 s17+bcm
19 kefF 166 2.008600 2.502427 s17+bcm
20 apaG 428 3.481729 3.589167 s17+bcm
21 rsmA 158 2.992111 3.253822 s17+bcm
22 lptD 132 3.014940 3.198107 s17+bcm
23 lptD 227 3.054996 3.218273 s17+bcm
24 djlA 96 2.201397 2.262451 s17+bcm
25 rluA 104 2.945469 2.911690 s17+bcm
26 araC 146 2.305351 2.547775 s17+bcm
27 thiQ 158 2.552668 2.737987 s17+bcm
28 leuD 141 1.690196 2.096910 s17+bcm
29 leuL 316 2.255273 2.428135 s17+bcm
... ... ... ... ... ...
1657 yjiY 88 3.460296 3.819807 s19+bcm
1658 tsr 245 3.765669 3.782114 s19+bcm
1659 yjjL 115 3.995591 4.007833 s19+bcm
1660 yjjL 122 3.997430 4.009451 s19+bcm
1661 yjjL 160 4.044618 4.049179 s19+bcm
1662 yjjM 96 3.319314 3.412629 s19+bcm
1663 yjjA 148 3.470851 3.615003 s19+bcm
1664 dnaT 98 3.688509 3.711048 s19+bcm
1665 dnaT 98 3.688509 3.711048 s19+bcm
1666 yjjB 227 3.507451 3.651569 s19+bcm
1667 yjjB 227 3.507451 3.651569 s19+bcm
1668 yjjP 176 3.868468 3.933031 s19+bcm
1669 yjjQ 158 3.509471 3.521530 s19+bcm
1670 fhuF 89 4.195927 4.161038 s19+bcm
1671 leuV 108 4.081419 4.209166 s19+bcm
1672 rsmC 168 2.994757 2.805501 s19+bcm
1673 osmY 243 3.466719 3.363612 s19+bcm
1674 deoC 644 3.401745 3.398981 s19+bcm
1675 yjjJ 112 2.984527 3.018284 s19+bcm
1676 lplA 121 2.833784 2.865104 s19+bcm
1677 yjjX 154 3.219323 3.207904 s19+bcm
1678 creA 278 3.267406 3.295567 s19+bcm
1679 arcA 202 3.962985 3.736157 s19+bcm
1680 arcA 206 3.964401 3.737908 s19+bcm
1681 arcA 229 3.980367 3.753200 s19+bcm
1682 arcA 293 4.050534 3.827692 s19+bcm
1683 arcA 375 4.053002 3.831806 s19+bcm
1684 arcA 382 4.053194 3.831998 s19+bcm
1685 arcA 495 4.056371 3.838660 s19+bcm
1686 yjtD 104 2.290035 2.465383 s19+bcm

10122 rows × 5 columns


In [73]:
p = ggplot(long, aes(x='R1', y='R2')) \
        + geom_point(size=2.5, alpha=0.1) \
        + geom_abline(slope=1, intercept=0, size=.5, alpha=0.4, color='#586e75') \
        + facet_wrap('sample')
print(p)


<ggplot: (8783501883256)>

In [ ]: