In [1]:
    
%matplotlib inline
    
In [2]:
    
import pandas as pd
import numpy as np
import os
from ggplot import *
    
In [3]:
    
# Sample titles with corresponding barcodes
samples = {
    's9': ['ATCACG', 'ACAGTG'],
    's9+bcm': ['CGATGT', 'GCCAAT'],
    's17': ['TTAGGC', 'GATCAG'],
    's17+bcm': ['TGACCA', 'TAGCTT'],
    's19': ['CAGATC','GGCTAC'],
    's19+bcm': ['ACTTGA', 'CTTGTA']
}
# Barcodes
barcodes = ['ATCACG', 'ACAGTG', 'CGATGT', 'GCCAAT', 'TTAGGC', 'GATCAG', 'TGACCA', 'TAGCTT', 'CAGATC','GGCTAC', 'ACTTGA', 'CTTGTA']
    
Gene counts were obtained using htseq program against the standard NC_000913 .GFF file The was I calculate reads in UTRs here is not strand-specific. So the numbers can be confounded if there is a transcript going in the opposite direction. We can solve this later if needed.
In [4]:
    
dfm = pd.DataFrame.from_csv('../data/dfm.csv', sep='\t')
dfm
    
    Out[4]:
  
    
       
      TSS 
      TU_name 
      coord_3 
      coord_5 
      first_gene_3 
      first_gene_5 
      gene 
      operon 
      promoter 
      seq_5UTR 
      ... 
      utr_CGATGT 
      utr_GCCAAT 
      utr_TTAGGC 
      utr_GATCAG 
      utr_TGACCA 
      utr_TAGCTT 
      utr_CAGATC 
      utr_GGCTAC 
      utr_ACTTGA 
      utr_CTTGTA 
     
  
  
    
      0 
      5030 
      yaaX 
      5234 
      5030 
      5530 
      5234 
      yaaX 
      yaaX 
      yaaXp4 
      ATTATCTCAATCAGGCCGGGTTTGCTTTTATGCAGCCCGGCTTTTT... 
      ... 
      351 
      370 
      298 
      448 
      439 
      446 
      137 
      235 
      479 
      450 
     
    
      1 
      6587 
      yaaA 
      6587 
      6459 
      6459 
      5683 
      yaaA 
      yaaA 
      yaaAp3 
      ATCCGGATATCGGTCGCCAGCTTTCTCCGGACGCGTGGGATGATGT... 
      ... 
      163 
      225 
      175 
      315 
      181 
      281 
      206 
      114 
      388 
      305 
     
    
      2 
      6615 
      yaaA 
      6615 
      6459 
      6459 
      5683 
      yaaA 
      yaaA 
      yaaAp5 
      GTGCGCCCGGTGTTTGATCCATTGCGTTATCCGGATATCGGTCGCC... 
      ... 
      172 
      236 
      175 
      322 
      191 
      303 
      207 
      116 
      411 
      329 
     
    
      3 
      11542 
      yaaW 
      11542 
      11356 
      11356 
      10643 
      yaaW 
      yaaW 
      yaaWp3 
      GCCTGAATATTCCTTCAGAAATAAAAGAAGGGCAAACCACTGACTG... 
      ... 
      504 
      776 
      12 
      36 
      567 
      1056 
      12 
      24 
      1505 
      1546 
     
    
      4 
      11913 
      yaaI 
      11913 
      11786 
      11786 
      11382 
      yaaI 
      yaaI 
      yaaIp3 
      ATCGCAGCCAAGGCATTCATCAAAAAATTGTAATAAAAAGAAAAGA... 
      ... 
      304 
      356 
      14 
      23 
      389 
      399 
      8 
      13 
      458 
      440 
     
    
      5 
      11938 
      yaaI 
      11938 
      11786 
      11786 
      11382 
      yaaI 
      yaaI 
      yaaIp2 
      GCAATTTTATTCATATAAAGAATGAATCGCAGCCAAGGCATTCATC... 
      ... 
      340 
      419 
      14 
      24 
      438 
      510 
      8 
      15 
      565 
      528 
     
    
      6 
      12048 
      dnaK-tpke11-dnaJ 
      12163 
      12048 
      14079 
      12163 
      dnaK 
      dnaK-tpke11-dnaJ 
      dnaKp1 
      AACCGCAGTGAGTGAGTCTGCAAAAAAATGAAATTGGGCAGTTGAA... 
      ... 
      2977 
      6584 
      4799 
      2830 
      6350 
      6968 
      2336 
      6099 
      15976 
      14174 
     
    
      7 
      17317 
      nhaAR 
      17489 
      17317 
      18655 
      17489 
      nhaA 
      nhaAR 
      nhaAp2 
      GGTCACTCGTGAGCGCTTACAGCCGTCAAAAACGCATCTCACCGCT... 
      ... 
      792 
      1358 
      133 
      215 
      1301 
      1908 
      74 
      137 
      2949 
      2223 
     
    
      8 
      21210 
      rpsT 
      21210 
      21078 
      21078 
      20815 
      rpsT 
      rpsT 
      rpsTp1 
      GCCATCACTACGTAACGAGTGCCGGCACATTAACGGCGCTTATTTG... 
      ... 
      9302 
      8966 
      14726 
      24465 
      10912 
      9380 
      23735 
      17554 
      7045 
      5706 
     
    
      9 
      21833 
      ileS-lspA-fkpB-ispH 
      22391 
      21833 
      25207 
      22391 
      ileS 
      ribF-ileS-lspA-fkpB-ispH 
      ileSp1 
      GCTGGCATGGAATACGGCTTCGATATCACCAGTACGCAAACTTTTT... 
      ... 
      1739 
      2728 
      5454 
      5556 
      2509 
      3155 
      4785 
      5718 
      6382 
      5578 
     
    
      10 
      22034 
      ileS-lspA-fkpB-ispH 
      22391 
      22034 
      25207 
      22391 
      ileS 
      ribF-ileS-lspA-fkpB-ispH 
      ileSp2 
      GCGAATGTACCGCTGCGCCGTCAGGTTTCCCCGGTGAAAGGGGTTT... 
      ... 
      1369 
      2398 
      4524 
      4748 
      2043 
      2715 
      4188 
      5051 
      5825 
      5171 
     
    
      11 
      22229 
      ileS-lspA-fkpB-ispH 
      22391 
      22229 
      25207 
      22391 
      ileS 
      ribF-ileS-lspA-fkpB-ispH 
      ileSp3 
      GTGCTGCGTAAAAAAATACGCAATGAGCAGCGATTTGCGTCGCTGG... 
      ... 
      1043 
      2079 
      3899 
      4030 
      1715 
      2298 
      3665 
      4388 
      5149 
      4749 
     
    
      12 
      25014 
      lspA-fkpB-ispH 
      25207 
      25014 
      25701 
      25207 
      lspA 
      ribF-ileS-lspA-fkpB-ispH 
      lspAp 
      ACGCACCTGCTGATGCTCAGCAGAGCGAAGTACTCAAAGGGCTGAA... 
      ... 
      1189 
      1295 
      1949 
      2738 
      1182 
      1692 
      2208 
      3284 
      2990 
      2404 
     
    
      13 
      28288 
      dapB 
      28374 
      28288 
      29195 
      28374 
      dapB 
      dapB 
      dapBp2 
      TAATTATCAGCGTTTTTGGCTGGCGGCGTAGCGATGCGCTGGTTAC... 
      ... 
      75 
      103 
      122 
      257 
      142 
      138 
      133 
      131 
      194 
      221 
     
    
      14 
      29551 
      carAB 
      29651 
      29551 
      30799 
      29651 
      carA 
      carAB 
      carAp1 
      GTTTGCCAGAAATTCGTCGGTAAGCAGATTTGCATTGATTTACGTC... 
      ... 
      72 
      116 
      61 
      63 
      124 
      141 
      130 
      61 
      206 
      212 
     
    
      15 
      34218 
      caiF 
      34300 
      34218 
      34695 
      34300 
      caiF 
      caiF 
      caiFp 
      ATCCACAATTTTAATATGGCCTTGTTTAATTGCTTCAAAACGAGTC... 
      ... 
      39 
      119 
      10 
      17 
      102 
      174 
      15 
      14 
      441 
      338 
     
    
      16 
      35499 
      caiE 
      35499 
      35371 
      35371 
      34781 
      caiE 
      caiTABCDE 
      caiEp3 
      GCGTATCGCTATATTCGCAGCGGCGTGTTGAAACACTATCCATCGG... 
      ... 
      204 
      260 
      5 
      19 
      202 
      426 
      17 
      19 
      908 
      833 
     
    
      17 
      42037 
      caiTABCDE 
      42037 
      41931 
      41931 
      40417 
      caiT 
      caiTABCDE 
      caiTp 
      GCCATTAACGCGTCCACGAGGTTAATAATAATTATATTAAATGTTA... 
      ... 
      22 
      63 
      1 
      3 
      33 
      88 
      1 
      2 
      105 
      144 
     
    
      18 
      45592 
      yaaU 
      45807 
      45592 
      47138 
      45807 
      yaaU 
      yaaU 
      yaaUp 
      CCCGCAGGTCTGTACAAGAAGCAGGATGACGGCAGTGTGCGCTTCG... 
      ... 
      320 
      371 
      5 
      29 
      298 
      863 
      2 
      19 
      1138 
      1221 
     
    
      19 
      47080 
      kefFC 
      47246 
      47080 
      47776 
      47246 
      kefF 
      kefFC 
      kefFp2 
      GAGGGATGTCACTGGCGCAGACCAGCAATATGACGATCCGCGGGCA... 
      ... 
      114 
      188 
      36 
      101 
      102 
      318 
      40 
      47 
      481 
      420 
     
    
      20 
      52034 
      apaGH 
      52034 
      51606 
      51606 
      51229 
      apaG 
      lptD-surA-pdxA-rsmA-apaGH 
      apaGp 
      ATTGCCGACATGCACTTTATGTTGCAAAAAGAGGTGGTGAATCGTC... 
      ... 
      3292 
      3141 
      2270 
      3686 
      3032 
      3883 
      3699 
      3152 
      4924 
      4449 
     
    
      21 
      52588 
      rsmA-apaGH 
      52588 
      52430 
      52430 
      51609 
      rsmA 
      lptD-surA-pdxA-rsmA-apaGH 
      rsmAp 
      GGCTTCGGGCGCGGTGTGAACATTACGCTGGGCCTGCCCTTTATTC... 
      ... 
      1129 
      1430 
      905 
      2211 
      982 
      1794 
      1888 
      1506 
      2132 
      1719 
     
    
      22 
      57241 
      lptD 
      57241 
      57109 
      57109 
      54755 
      lptD 
      lptD-surA-pdxA-rsmA-apaGH 
      impp2 
      ATTCAATGCCGTGCCTAACACCACTGACGCTATTCGGACAGGATTA... 
      ... 
      996 
      1071 
      2016 
      2770 
      1035 
      1578 
      2130 
      1373 
      1517 
      1384 
     
    
      23 
      57336 
      lptD-surA-pdxA 
      57336 
      57109 
      57109 
      54755 
      lptD 
      lptD-surA-pdxA-rsmA-apaGH 
      impp3 
      CAGGGCTATCTCCCACAATATAAAGGTGCTTTTACCGTTTTCCGGC... 
      ... 
      1047 
      1152 
      2130 
      2882 
      1135 
      1653 
      2279 
      1468 
      1641 
      1495 
     
    
      24 
      57268 
      djlA 
      57364 
      57268 
      58179 
      57364 
      djlA 
      djlA 
      djlAp3 
      GCGATGTCCCACAATTGACCGCAGCCGGAAAACGGTAAAAGCACCT... 
      ... 
      103 
      146 
      134 
      139 
      159 
      183 
      205 
      134 
      231 
      247 
     
    
      25 
      60450 
      rluA 
      60450 
      60346 
      60346 
      59687 
      rluA 
      hepA-rluA 
      rluAp1 
      ATTGAGAGCAACCGTCAGCAGGTAATGGAAAGCCTGGATCAGGCAG... 
      ... 
      884 
      747 
      544 
      623 
      882 
      816 
      1444 
      709 
      1012 
      1013 
     
    
      26 
      70241 
      araC 
      70387 
      70241 
      71265 
      70387 
      araC 
      araC 
      araCp 
      TGGTCCCGCTTTGTTACAGAATGCTTTTAATAAGCGGGGTTACCGG... 
      ... 
      177 
      295 
      252 
      187 
      202 
      353 
      53 
      114 
      614 
      753 
     
    
      27 
      73085 
      thiQ 
      73085 
      72927 
      72927 
      72229 
      thiQ 
      sgrR-sroA-tbpA-thiPQ 
      thiQp2 
      GGTGGCGTTGTTCGGTAACGATGATTTCCGCACCCTGCCGTTTTAT... 
      ... 
      349 
      463 
      114 
      234 
      357 
      547 
      173 
      198 
      698 
      728 
     
    
      28 
      79594 
      leuD 
      79594 
      79453 
      79453 
      78848 
      leuD 
      leuLABCD 
      leuDp4 
      GTGCCTCCACCAGCAACCGTAACTTTGAAGGCCGCCAGGGGCGCGG... 
      ... 
      71 
      88 
      31 
      67 
      49 
      125 
      62 
      93 
      221 
      181 
     
    
      29 
      84024 
      leuLABCD 
      84024 
      83708 
      83708 
      83622 
      leuL 
      leuLABCD 
      leuLp2 
      AAATCAATATCAAAAAAAATCGCAAAACATATAATTCAATACAAAT... 
      ... 
      94 
      222 
      256 
      294 
      180 
      268 
      79 
      155 
      420 
      334 
     
    
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
     
    
      1657 
      4589390 
      yjiY 
      4589390 
      4589302 
      4589302 
      4587152 
      yjiY 
      yjiY 
      yjiYp 
      AGGATAGCGGTCAATTTACCTCCTCAAACGCAACGCAAACCTAGAA... 
      ... 
      1240 
      5424 
      2950 
      7589 
      1527 
      8506 
      324 
      18591 
      3646 
      12952 
     
    
      1658 
      4589435 
      tsr 
      4589680 
      4589435 
      4591335 
      4589680 
      tsr 
      tsr 
      tsrp1 
      AACTGAGTGGTTATTTTAGGGATGTAAGCGGTCAGTTTTGCGGTTG... 
      ... 
      1981 
      6099 
      4244 
      7442 
      2310 
      7596 
      380 
      19107 
      3708 
      13198 
     
    
      1659 
      4592860 
      yjjL 
      4592860 
      4592745 
      4592745 
      4591384 
      yjjL 
      yjjL 
      yjjLp7 
      AAAAATGTTTTTGTGAGCGGTAGTAAAGTCCTAAAACTTTAACCTG... 
      ... 
      2960 
      2760 
      34 
      143 
      2371 
      4532 
      11 
      69 
      6694 
      6507 
     
    
      1660 
      4592867 
      yjjL 
      4592867 
      4592745 
      4592745 
      4591384 
      yjjL 
      yjjL 
      yjjLp6 
      GTCGACTAAAAATGTTTTTGTGAGCGGTAGTAAAGTCCTAAAACTT... 
      ... 
      3095 
      2914 
      34 
      147 
      2542 
      4692 
      11 
      72 
      7034 
      6783 
     
    
      1661 
      4592905 
      yjjL 
      4592905 
      4592745 
      4592745 
      4591384 
      yjjL 
      yjjL 
      yjjLp8 
      AATACCGGGAAATTCCCGCTTACCTATGCTCACAATCAGTCGACTA... 
      ... 
      4063 
      3850 
      40 
      173 
      3666 
      5851 
      12 
      85 
      8908 
      8482 
     
    
      1662 
      4593970 
      yjjM 
      4593970 
      4593874 
      4593874 
      4592960 
      yjjM 
      yjjM 
      yjjMp4 
      ATGAAAAATGGCAGCGGAACGGAAAATCTTTTTTGTGAAAACACAC... 
      ... 
      2083 
      1977 
      32 
      105 
      2088 
      3125 
      46 
      117 
      7863 
      10366 
     
    
      1663 
      4598360 
      yjjA 
      4598360 
      4598212 
      4598212 
      4597718 
      yjjA 
      yjjB-dnaTC-yjjA 
      yjjAp1 
      GGGCGAACGCGTGATGGACCGTATGCGCCTGGGTAACAGTTTGTGG... 
      ... 
      1336 
      1759 
      292 
      532 
      1931 
      2841 
      436 
      434 
      1776 
      1739 
     
    
      1664 
      4599638 
      dnaTC-yjjA 
      4599638 
      4599540 
      4599540 
      4599001 
      dnaT 
      yjjB-dnaTC-yjjA 
      dnaTp 
      CGCTTAAGCACACGGATGAGAGACAGCCTCCTCTCCTCCGTGTGTT... 
      ... 
      2809 
      2471 
      35 
      66 
      2550 
      3307 
      30 
      40 
      2253 
      2253 
     
    
      1665 
      4599638 
      dnaTC 
      4599638 
      4599540 
      4599540 
      4599001 
      dnaT 
      yjjB-dnaTC-yjjA 
      dnaTp 
      CGCTTAAGCACACGGATGAGAGACAGCCTCCTCTCCTCCGTGTGTT... 
      ... 
      2809 
      2471 
      35 
      66 
      2550 
      3307 
      30 
      40 
      2253 
      2253 
     
    
      1666 
      4600200 
      yjjB-dnaTC-yjjA 
      4600200 
      4599973 
      4599973 
      4599647 
      yjjB 
      yjjB-dnaTC-yjjA 
      yjjBp 
      CGCTGGGCGATCGCCAGTCTGCTGACACTGGCTACCTGCGTCGGCG... 
      ... 
      3408 
      2771 
      301 
      393 
      3686 
      4104 
      332 
      321 
      2541 
      2726 
     
    
      1667 
      4600200 
      yjjB-dnaTC 
      4600200 
      4599973 
      4599973 
      4599647 
      yjjB 
      yjjB-dnaTC-yjjA 
      yjjBp 
      CGCTGGGCGATCGCCAGTCTGCTGACACTGGCTACCTGCGTCGGCG... 
      ... 
      3408 
      2771 
      301 
      393 
      3686 
      4104 
      332 
      321 
      2541 
      2726 
     
    
      1668 
      4601057 
      yjjP 
      4601057 
      4600881 
      4600881 
      4600111 
      yjjP 
      yjjP 
      yjjPp 
      GATAGTTTGTTTGCGGCGAGAGATAATTCGCTTTTTATCACCGAGC... 
      ... 
      4422 
      4996 
      177 
      362 
      4721 
      6884 
      320 
      299 
      4190 
      6384 
     
    
      1669 
      4601342 
      yjjQ-bglJ 
      4601500 
      4601342 
      4602225 
      4601500 
      yjjQ 
      yjjQ-bglJ 
      yjjQp 
      ACATGCAGTGGAGTTGTTGTGCAGCAGGAGTATGCTGATATGAAAG... 
      ... 
      5614 
      6083 
      240 
      529 
      5453 
      9098 
      349 
      503 
      5119 
      5931 
     
    
      1670 
      4603775 
      fhuF 
      4603775 
      4603686 
      4603686 
      4602898 
      fhuF 
      fhuF 
      fhuFp2 
      ATCATTTGCAAGCCAGATAAATCCCTTGCTATCGGGTAAACCTATC... 
      ... 
      3901 
      5340 
      37 
      95 
      6301 
      7406 
      12 
      71 
      5391 
      6479 
     
    
      1671 
      4604296 
      leuV 
      4604296 
      4604188 
      4604188 
      4604102 
      leuV 
      leuQPV 
      leuVp2 
      AATTGGTAGACGCGCTAGCTTCAGGTGTTAGTGTTCTTACGGACGT... 
      ... 
      8529 
      8317 
      24 
      145 
      7494 
      11600 
      23 
      123 
      9619 
      13085 
     
    
      1672 
      4605891 
      rsmC 
      4605891 
      4605723 
      4605723 
      4604692 
      rsmC 
      rsmC 
      rsmCp1 
      AGGGCGACGCAGCGACCACTGGGTAATGCCCAGTTGCTGTAACTGC... 
      ... 
      23971 
      25280 
      131 
      444 
      28109 
      37675 
      120 
      298 
      31832 
      30399 
     
    
      1673 
      4609176 
      osmY 
      4609419 
      4609176 
      4610024 
      4609419 
      osmY 
      osmY 
      osmYp 
      GTGATGACATTTCTGACGGCGTTAAATACCGTTCAATGCGTAGATA... 
      ... 
      1131 
      1066 
      711 
      2113 
      957 
      1281 
      1854 
      1150 
      1343 
      1131 
     
    
      1674 
      4614702 
      deoCABD 
      4615346 
      4614702 
      4616125 
      4615346 
      deoC 
      deoCABD 
      deoCp1 
      ATACGGTTGCAACAACGCATCCAGTTGCCCCAGGTAGACCGGCATC... 
      ... 
      749 
      744 
      84 
      108 
      653 
      1067 
      33 
      62 
      3473 
      2084 
     
    
      1675 
      4619680 
      yjjJ 
      4619792 
      4619680 
      4621123 
      4619792 
      yjjJ 
      yjjJ 
      yjjJp2 
      GGCTTTTTAGTATCTATTCATTTTTCTCTCCAGCTTGAATATTTTC... 
      ... 
      1477 
      1516 
      3697 
      5754 
      1213 
      2070 
      2906 
      5983 
      2706 
      3402 
     
    
      1676 
      4622261 
      lplA 
      4622261 
      4622140 
      4622140 
      4621124 
      lplA 
      ytjB-lplA 
      lplAp2 
      ACAGGGTAAACGCACCCGCTGGCAGCAATCGCCCTTCCTGTTAACC... 
      ... 
      304 
      318 
      255 
      159 
      381 
      409 
      109 
      73 
      695 
      685 
     
    
      1677 
      4631922 
      yjjX 
      4631922 
      4631768 
      4631768 
      4631256 
      yjjX 
      yjjX 
      yjjXp6 
      GTTGCTCACCTTTGGCGGTCAGCGGGCTGTCAGACTGGCCCTGAAT... 
      ... 
      472 
      440 
      125 
      85 
      393 
      691 
      55 
      102 
      1072 
      1030 
     
    
      1678 
      4633266 
      creABC 
      4633544 
      4633266 
      4634017 
      4633544 
      creA 
      creABCD 
      creAp2 
      GACAGGGGCTGATCCAGATGACCTTCCAGCCAGATTAAAAGGTCGC... 
      ... 
      1276 
      1070 
      344 
      421 
      940 
      1328 
      277 
      297 
      1849 
      1855 
     
    
      1679 
      4638531 
      arcA 
      4638531 
      4638329 
      4638329 
      4637613 
      arcA 
      arcA 
      arcAp1 
      GTTTTTGACACTGTCGGGTCCTGAGGGAAAGTACCCACGACCAAGC... 
      ... 
      334 
      666 
      46 
      40 
      1263 
      620 
      14 
      22 
      1038 
      955 
     
    
      1680 
      4638535 
      arcA 
      4638535 
      4638329 
      4638329 
      4637613 
      arcA 
      arcA 
      arcAp2 
      AGCCGTTTTTGACACTGTCGGGTCCTGAGGGAAAGTACCCACGACC... 
      ... 
      335 
      674 
      46 
      40 
      1270 
      630 
      14 
      22 
      1046 
      966 
     
    
      1681 
      4638558 
      arcA 
      4638558 
      4638329 
      4638329 
      4637613 
      arcA 
      arcA 
      arcAp3 
      GCTGTTAAAATGGTTAGGATGACAGCCGTTTTTGACACTGTCGGGT... 
      ... 
      347 
      693 
      46 
      41 
      1284 
      664 
      14 
      23 
      1082 
      999 
     
    
      1682 
      4638622 
      arcA 
      4638622 
      4638329 
      4638329 
      4637613 
      arcA 
      arcA 
      arcAp4 
      ACTTGATATATGTCAACGAAGCGTAGTTTTATTGGGTGTCCGGCCC... 
      ... 
      376 
      741 
      48 
      41 
      1337 
      717 
      14 
      26 
      1186 
      1078 
     
    
      1683 
      4638704 
      arcA 
      4638704 
      4638329 
      4638329 
      4637613 
      arcA 
      arcA 
      arcAp5 
      TAGTTGGATTATTAAAATAATGTGACGAAAGCTAGCATTTAGATAC... 
      ... 
      420 
      797 
      54 
      45 
      1385 
      780 
      15 
      29 
      1310 
      1216 
     
    
      1684 
      4638711 
      arcA 
      4638711 
      4638329 
      4638329 
      4637613 
      arcA 
      arcA 
      arcAp6 
      ATGCAACTAGTTGGATTATTAAAATAATGTGACGAAAGCTAGCATT... 
      ... 
      425 
      800 
      54 
      45 
      1387 
      780 
      15 
      29 
      1318 
      1225 
     
    
      1685 
      4638824 
      arcA 
      4638824 
      4638329 
      4638329 
      4637613 
      arcA 
      arcA 
      arcAp7 
      CTGTACTAACGGTTGAGTTGTTAAAAAATGCTACATATCCTTCTGT... 
      ... 
      457 
      843 
      56 
      48 
      1420 
      835 
      15 
      32 
      1408 
      1333 
     
    
      1686 
      4638861 
      yjtD 
      4638965 
      4638861 
      4639651 
      4638965 
      yjtD 
      yjtD 
      yjtDp8 
      GGGCTTTTTCTGCGACTTACGTTAAGAATTTGTAAATTCGCACCGC... 
      ... 
      185 
      232 
      23 
      25 
      272 
      268 
      10 
      16 
      362 
      297 
     
  
1687 rows × 35 columns
In [5]:
    
id_vars = ['TSS','TU_name','coord_5','coord_3','gene', 'UTR_length']
value_vars = ['s9','s17','s19','s9+bcm','s17+bcm','s19+bcm']
dfn = dfm.copy()
# Normalize counts by gene and utr length
def norm_orf(barcode, rec):
    return float(rec[barcode] / abs(rec['first_gene_5'] - rec['first_gene_3']))
def norm_utr(barcode, rec):
    return float(rec['utr_{0}'.format(barcode)] / rec['UTR_length'])
for barcode in barcodes:
    dfn['orf_{0}'.format(barcode)] = dfn.apply(lambda rec: norm_orf(barcode, rec), axis=1)
    dfn['utr_{0}'.format(barcode)] = dfn.apply(lambda rec: norm_utr(barcode, rec), axis=1)
    
df = dfn[id_vars].copy()
# Take means across replicates according to the samples dict
for sample, bcs in samples.items():
    df['orf_{0}'.format(sample)] = np.log10(dfn[['orf_{0}'.format(b) for b in list(bcs)]].mean(axis=1))
    df['utr_{0}'.format(sample)] = np.log10(dfn[['utr_{0}'.format(b) for b in list(bcs)]].mean(axis=1))
df
    
    Out[5]:
  
    
       
      TSS 
      TU_name 
      coord_5 
      coord_3 
      gene 
      UTR_length 
      orf_s9 
      utr_s9 
      orf_s19+bcm 
      utr_s19+bcm 
      orf_s9+bcm 
      utr_s9+bcm 
      orf_s17+bcm 
      utr_s17+bcm 
      orf_s19 
      utr_s19 
      orf_s17 
      utr_s17 
     
  
  
    
      0 
      5030 
      yaaX 
      5030 
      5234 
      yaaX 
      204 
      -0.040733 
      0.278642 
      0.415199 
      0.357356 
      0.224628 
      0.247275 
      0.237554 
      0.336283 
      -0.123962 
      -0.040117 
      -0.068171 
      0.262079 
     
    
      1 
      6587 
      yaaA 
      6459 
      6587 
      yaaA 
      128 
      0.040578 
      0.236199 
      0.232518 
      0.432493 
      -0.027730 
      0.180592 
      0.048408 
      0.256402 
      -0.005917 
      0.096910 
      0.068941 
      0.281956 
     
    
      2 
      6615 
      yaaA 
      6459 
      6615 
      yaaA 
      156 
      0.040578 
      0.161944 
      0.232518 
      0.375077 
      -0.027730 
      0.116506 
      0.048408 
      0.199572 
      -0.005917 
      0.015048 
      0.068941 
      0.202202 
     
    
      3 
      11542 
      yaaW 
      11356 
      11542 
      yaaW 
      186 
      -1.158484 
      -1.052029 
      0.825338 
      0.913899 
      0.384203 
      0.536667 
      0.450862 
      0.639776 
      -1.074938 
      -1.014240 
      -1.112727 
      -0.889302 
     
    
      4 
      11913 
      yaaI 
      11786 
      11913 
      yaaI 
      127 
      -1.101231 
      -0.913472 
      0.611366 
      0.548443 
      0.270702 
      0.414710 
      0.369510 
      0.491693 
      -1.226170 
      -1.082614 
      -0.993598 
      -0.836632 
     
    
      5 
      11938 
      yaaI 
      11786 
      11938 
      yaaI 
      152 
      -1.101231 
      -0.926571 
      0.611366 
      0.555747 
      0.270702 
      0.397368 
      0.369510 
      0.493935 
      -1.226170 
      -1.121146 
      -0.993598 
      -0.903090 
     
    
      6 
      12048 
      dnaK-tpke11-dnaJ 
      12048 
      12163 
      dnaK 
      115 
      1.370097 
      1.332965 
      1.966770 
      2.117559 
      1.539609 
      1.618775 
      1.639229 
      1.762711 
      1.643435 
      1.564357 
      1.360430 
      1.520740 
     
    
      7 
      17317 
      nhaAR 
      17317 
      17489 
      nhaA 
      172 
      0.068752 
      -0.024675 
      0.958300 
      1.177100 
      0.466310 
      0.795880 
      0.557119 
      0.969811 
      -0.051339 
      -0.212276 
      -0.026888 
      0.005021 
     
    
      8 
      21210 
      rpsT 
      21078 
      21210 
      rpsT 
      132 
      2.351260 
      2.320020 
      1.655317 
      1.683940 
      1.922250 
      1.840087 
      1.954628 
      1.885721 
      2.264445 
      2.194230 
      2.231317 
      2.171582 
     
    
      9 
      21833 
      ileS-lspA-fkpB-ispH 
      21833 
      22391 
      ileS 
      558 
      0.818969 
      0.927446 
      0.933123 
      1.030067 
      0.554431 
      0.602352 
      0.598032 
      0.705459 
      0.949946 
      0.973649 
      0.793145 
      0.994123 
     
    
      10 
      22034 
      ileS-lspA-fkpB-ispH 
      22034 
      22391 
      ileS 
      357 
      0.818969 
      1.045520 
      0.933123 
      1.187537 
      0.554431 
      0.722297 
      0.598032 
      0.823726 
      0.949946 
      1.111927 
      0.793145 
      1.113475 
     
    
      11 
      22229 
      ileS-lspA-fkpB-ispH 
      22229 
      22391 
      ileS 
      162 
      0.818969 
      1.318179 
      0.933123 
      1.485002 
      0.554431 
      0.983888 
      0.598032 
      1.092924 
      0.949946 
      1.395413 
      0.793145 
      1.388673 
     
    
      12 
      25014 
      lspA-fkpB-ispH 
      25014 
      25207 
      lspA 
      193 
      0.812101 
      1.125894 
      0.775358 
      1.145324 
      0.489258 
      0.808564 
      0.520719 
      0.871899 
      0.882614 
      1.153143 
      0.703779 
      1.084308 
     
    
      13 
      28288 
      dapB 
      28288 
      28374 
      dapB 
      86 
      0.301427 
      0.226870 
      0.370426 
      0.382520 
      0.025426 
      0.014892 
      0.158091 
      0.211630 
      0.413118 
      0.186075 
      0.468394 
      0.343111 
     
    
      14 
      29551 
      carAB 
      29551 
      29651 
      carA 
      100 
      -0.269305 
      -0.055517 
      0.217094 
      0.320146 
      -0.047738 
      -0.026872 
      0.006570 
      0.122216 
      -0.120173 
      -0.019997 
      -0.376895 
      -0.207608 
     
    
      15 
      34218 
      caiF 
      34218 
      34300 
      caiF 
      82 
      -0.647207 
      -0.474481 
      0.753263 
      0.676694 
      0.069921 
      -0.016187 
      0.274684 
      0.226065 
      -0.754612 
      -0.752446 
      -0.685439 
      -0.783480 
     
    
      16 
      35499 
      caiE 
      35371 
      35499 
      caiE 
      128 
      -0.875982 
      -1.028029 
      0.465559 
      0.832559 
      -0.169335 
      0.258278 
      -0.054431 
      0.389720 
      -0.948030 
      -0.851937 
      -0.925754 
      -1.028029 
     
    
      17 
      42037 
      caiTABCDE 
      41931 
      42037 
      caiT 
      106 
      -1.773586 
      -1.849215 
      0.077073 
      0.069863 
      -0.375986 
      -0.396917 
      -0.180126 
      -0.243550 
      -1.732968 
      -1.849215 
      -1.655081 
      -1.724276 
     
    
      18 
      45592 
      yaaU 
      45592 
      45807 
      yaaU 
      215 
      -1.275989 
      -1.235528 
      0.372890 
      0.739259 
      -0.173083 
      0.206010 
      0.008722 
      0.431364 
      -1.425208 
      -1.311249 
      -1.240517 
      -1.101990 
     
    
      19 
      47080 
      kefFC 
      47080 
      47246 
      kefF 
      166 
      -0.520156 
      -0.464233 
      0.424789 
      0.433587 
      -0.015855 
      -0.041131 
      0.153383 
      0.102111 
      -0.592337 
      -0.581619 
      -0.438719 
      -0.384418 
     
    
      20 
      52034 
      apaGH 
      51606 
      52034 
      apaG 
      428 
      0.687531 
      0.888712 
      0.845591 
      1.039405 
      0.603785 
      0.875940 
      0.665081 
      0.907318 
      0.682296 
      0.903280 
      0.632905 
      0.842481 
     
    
      21 
      52588 
      rsmA-apaGH 
      52430 
      52588 
      rsmA 
      158 
      0.721794 
      1.091823 
      0.854295 
      1.085886 
      0.680933 
      0.908383 
      0.711535 
      0.943732 
      0.754369 
      1.031025 
      0.647533 
      0.993910 
     
    
      22 
      57241 
      lptD 
      57109 
      57241 
      lptD 
      132 
      0.741502 
      1.230739 
      0.619486 
      1.040944 
      0.439668 
      0.893737 
      0.495573 
      0.995535 
      0.758222 
      1.122836 
      0.769439 
      1.258369 
     
    
      23 
      57336 
      lptD-surA-pdxA 
      57109 
      57336 
      lptD 
      227 
      0.741502 
      1.019180 
      0.619486 
      0.839320 
      0.439668 
      0.685169 
      0.495573 
      0.788237 
      0.758222 
      0.916628 
      0.769439 
      1.042955 
     
    
      24 
      57268 
      djlA 
      57268 
      57364 
      djlA 
      96 
      0.010789 
      0.200999 
      0.388350 
      0.396127 
      0.114967 
      0.112898 
      0.147837 
      0.250725 
      0.090360 
      0.246898 
      -0.009699 
      0.152861 
     
    
      25 
      60450 
      rluA 
      60346 
      60450 
      rluA 
      104 
      0.466672 
      0.894657 
      0.596088 
      0.988362 
      0.542086 
      0.894391 
      0.554211 
      0.911874 
      0.611512 
      1.014981 
      0.423407 
      0.749008 
     
    
      26 
      70241 
      araC 
      70241 
      70387 
      araC 
      146 
      -0.668183 
      -0.375478 
      0.263466 
      0.670386 
      -0.069593 
      0.208559 
      0.074790 
      0.278910 
      -0.524365 
      -0.242666 
      -0.055596 
      0.177082 
     
    
      27 
      73085 
      thiQ 
      72927 
      73085 
      thiQ 
      158 
      -0.473713 
      0.113097 
      0.261314 
      0.654432 
      -0.050065 
      0.409869 
      0.038100 
      0.456481 
      -0.487830 
      0.069687 
      -0.499463 
      0.041892 
     
    
      28 
      79594 
      leuD 
      79453 
      79594 
      leuD 
      141 
      -0.572240 
      -0.378367 
      -0.017206 
      0.153977 
      -0.362626 
      -0.248852 
      -0.311198 
      -0.209700 
      -0.546227 
      -0.259917 
      -0.676245 
      -0.459023 
     
    
      29 
      84024 
      leuLABCD 
      83708 
      84024 
      leuL 
      316 
      0.425337 
      -0.275672 
      0.438414 
      0.076654 
      0.150078 
      -0.301030 
      0.247345 
      -0.149439 
      0.354421 
      -0.431501 
      0.684595 
      -0.060354 
     
    
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
      ... 
     
    
      1657 
      4589390 
      yjiY 
      4589302 
      4589390 
      yjiY 
      88 
      1.239090 
      1.704170 
      1.505317 
      1.974543 
      1.181578 
      1.578222 
      1.325749 
      1.755918 
      1.617327 
      2.031294 
      1.392771 
      1.777287 
     
    
      1658 
      4589435 
      tsr 
      4589435 
      4589680 
      tsr 
      245 
      -0.675973 
      1.257140 
      1.408266 
      1.537845 
      0.992936 
      1.217215 
      1.106440 
      1.305702 
      -0.757149 
      1.599549 
      -0.400572 
      1.377470 
     
    
      1659 
      4592860 
      yjjL 
      4592745 
      4592860 
      yjjL 
      115 
      -0.542237 
      -0.529219 
      1.732302 
      1.758879 
      1.075871 
      1.395668 
      1.164711 
      1.477310 
      -0.528015 
      -0.458638 
      -0.393890 
      -0.113755 
     
    
      1660 
      4592867 
      yjjL 
      4592745 
      4592867 
      yjjL 
      122 
      -0.542237 
      -0.542292 
      1.732302 
      1.753024 
      1.075871 
      1.391412 
      1.164711 
      1.471989 
      -0.528015 
      -0.468312 
      -0.393890 
      -0.129711 
     
    
      1661 
      4592905 
      yjjL 
      4592745 
      4592905 
      yjjL 
      160 
      -0.542237 
      -0.546109 
      1.732302 
      1.735150 
      1.075871 
      1.393191 
      1.164711 
      1.473350 
      -0.528015 
      -0.518378 
      -0.393890 
      -0.176770 
     
    
      1662 
      4593970 
      yjjM 
      4593874 
      4593970 
      yjjM 
      96 
      -0.697310 
      -0.090177 
      1.489834 
      1.977462 
      1.152512 
      1.325225 
      1.316606 
      1.433786 
      -0.617538 
      -0.071114 
      -0.297245 
      -0.146581 
     
    
      1663 
      4598360 
      yjjA 
      4598212 
      4598360 
      yjjA 
      148 
      0.320163 
      0.413504 
      1.117177 
      1.074634 
      1.190927 
      1.019369 
      1.255053 
      1.207409 
      0.268406 
      0.468228 
      0.272884 
      0.444636 
     
    
      1664 
      4599638 
      dnaTC-yjjA 
      4599540 
      4599638 
      dnaT 
      98 
      0.185128 
      -0.423024 
      1.254556 
      1.361535 
      1.252353 
      1.430378 
      1.341084 
      1.475419 
      0.096749 
      -0.447158 
      0.078980 
      -0.287935 
     
    
      1665 
      4599638 
      dnaTC 
      4599540 
      4599638 
      dnaT 
      98 
      0.185128 
      -0.423024 
      1.254556 
      1.361535 
      1.252353 
      1.430378 
      1.341084 
      1.475419 
      0.096749 
      -0.447158 
      0.078980 
      -0.287935 
     
    
      1666 
      4600200 
      yjjB-dnaTC-yjjA 
      4599973 
      4600200 
      yjjB 
      227 
      -0.818612 
      0.228870 
      1.151988 
      1.064507 
      1.235552 
      1.133862 
      1.293640 
      1.234482 
      -0.749790 
      0.157857 
      -0.615591 
      0.184304 
     
    
      1667 
      4600200 
      yjjB-dnaTC 
      4599973 
      4600200 
      yjjB 
      227 
      -0.818612 
      0.228870 
      1.151988 
      1.064507 
      1.235552 
      1.133862 
      1.293640 
      1.234482 
      -0.749790 
      0.157857 
      -0.615591 
      0.184304 
     
    
      1668 
      4601057 
      yjjP 
      4600881 
      4601057 
      yjjP 
      176 
      -0.534308 
      0.244446 
      1.491480 
      1.477697 
      1.542712 
      1.427416 
      1.677174 
      1.518102 
      -0.503574 
      0.245148 
      -0.339948 
      0.185046 
     
    
      1669 
      4601342 
      yjjQ-bglJ 
      4601342 
      4601500 
      yjjQ 
      158 
      -0.925840 
      0.526028 
      1.295772 
      1.543675 
      1.192664 
      1.568387 
      1.335313 
      1.663206 
      -0.915855 
      0.430753 
      -0.714210 
      0.386239 
     
    
      1670 
      4603775 
      fhuF 
      4603686 
      4603775 
      fhuF 
      89 
      0.459404 
      -0.363929 
      1.698181 
      1.824031 
      1.653873 
      1.715299 
      1.749789 
      1.886522 
      0.606583 
      -0.331342 
      0.487199 
      -0.129846 
     
    
      1671 
      4604296 
      leuV 
      4604188 
      4604296 
      leuV 
      108 
      0.798297 
      -0.277549 
      1.168963 
      2.021649 
      1.233557 
      1.892043 
      1.022869 
      1.946443 
      0.869982 
      -0.170101 
      0.650398 
      -0.106567 
     
    
      1672 
      4605891 
      rsmC 
      4605723 
      4605891 
      rsmC 
      168 
      0.498959 
      0.206054 
      0.194645 
      2.267668 
      0.066826 
      2.166076 
      0.061558 
      2.291781 
      0.439448 
      0.094837 
      0.342958 
      0.233329 
     
    
      1673 
      4609176 
      osmY 
      4609176 
      4609419 
      osmY 
      243 
      -0.298882 
      0.840607 
      1.111562 
      0.706763 
      0.853829 
      0.655194 
      0.940426 
      0.663224 
      -0.366782 
      0.791064 
      -0.292500 
      0.764228 
     
    
      1674 
      4614702 
      deoCABD 
      4614702 
      4615346 
      deoC 
      644 
      0.542952 
      -0.919584 
      0.928006 
      0.634925 
      0.475818 
      0.064144 
      0.719282 
      0.125613 
      0.622080 
      -1.132192 
      1.070690 
      -0.826615 
     
    
      1675 
      4619680 
      yjjJ 
      4619680 
      4619792 
      yjjJ 
      112 
      -0.256711 
      1.588822 
      0.431037 
      1.435651 
      0.108564 
      1.125859 
      0.191477 
      1.166023 
      -0.165137 
      1.598605 
      0.001791 
      1.625230 
     
    
      1676 
      4622261 
      lplA 
      4622140 
      4622261 
      lplA 
      121 
      -0.410297 
      -0.135842 
      0.361858 
      0.756064 
      0.025120 
      0.409975 
      0.042130 
      0.513812 
      -0.143273 
      -0.123744 
      -0.323847 
      0.233185 
     
    
      1677 
      4631922 
      yjjX 
      4631768 
      4631922 
      yjjX 
      154 
      0.274582 
      -0.391641 
      0.779492 
      0.834082 
      0.528148 
      0.471444 
      0.552469 
      0.546479 
      0.187256 
      -0.292651 
      0.281956 
      -0.166331 
     
    
      1678 
      4633266 
      creABC 
      4633266 
      4633544 
      creA 
      278 
      -0.010689 
      0.108015 
      0.628119 
      0.823596 
      0.221665 
      0.625253 
      0.290576 
      0.610568 
      0.127570 
      0.013837 
      0.104735 
      0.138587 
     
    
      1679 
      4638531 
      arcA 
      4638329 
      4638531 
      arcA 
      202 
      1.193160 
      -1.087867 
      1.401348 
      0.693126 
      0.825740 
      0.393619 
      0.841006 
      0.668469 
      1.216159 
      -1.050079 
      1.167144 
      -0.671883 
     
    
      1680 
      4638535 
      arcA 
      4638329 
      4638535 
      arcA 
      206 
      1.193160 
      -1.096383 
      1.401348 
      0.688731 
      0.825740 
      0.388994 
      0.841006 
      0.663856 
      1.216159 
      -1.058595 
      1.167144 
      -0.680399 
     
    
      1681 
      4638558 
      arcA 
      4638329 
      4638558 
      arcA 
      229 
      1.193160 
      -1.104563 
      1.401348 
      0.657407 
      0.825740 
      0.356168 
      0.841006 
      0.628723 
      1.216159 
      -1.092664 
      1.167144 
      -0.721346 
     
    
      1682 
      4638622 
      arcA 
      4638329 
      4638622 
      arcA 
      293 
      1.193160 
      -1.188114 
      1.401348 
      0.586979 
      0.825740 
      0.280156 
      0.841006 
      0.544703 
      1.216159 
      -1.165838 
      1.167144 
      -0.818508 
     
    
      1683 
      4638704 
      arcA 
      4638329 
      4638704 
      arcA 
      375 
      1.193160 
      -1.262277 
      1.401348 
      0.527372 
      0.825740 
      0.210229 
      0.841006 
      0.460397 
      1.216159 
      -1.231609 
      1.167144 
      -0.879426 
     
    
      1684 
      4638711 
      arcA 
      4638329 
      4638711 
      arcA 
      382 
      1.193160 
      -1.259844 
      1.401348 
      0.522253 
      0.825740 
      0.205043 
      0.841006 
      0.452766 
      1.216159 
      -1.239641 
      1.167144 
      -0.887458 
     
    
      1685 
      4638824 
      arcA 
      4638329 
      4638824 
      arcA 
      495 
      1.193160 
      -1.323537 
      1.401348 
      0.442274 
      0.825740 
      0.118308 
      0.841006 
      0.357511 
      1.216159 
      -1.323537 
      1.167144 
      -0.978602 
     
    
      1686 
      4638861 
      yjtD 
      4638861 
      4638965 
      yjtD 
      104 
      -0.532049 
      -0.799549 
      -0.009925 
      0.500822 
      -0.479343 
      0.302073 
      -0.367239 
      0.414330 
      -0.272843 
      -0.903090 
      -0.499864 
      -0.636822 
     
  
1687 rows × 18 columns
Two clusters are apparent. We are after the UTRs that are upregulated by the addition of BCM (cloud of points in the left part of the plot along y=0 line and in general (significantly) above y=x line).
BTW, the point size is the length of UTR. No (apparent) correlation here.
In [6]:
    
p = ggplot(df, aes(x='utr_s9', y='utr_s9+bcm', size='UTR_length')) \
        + geom_point(alpha=0.1) \
        + geom_abline(slope=1, intercept=0, size=2.5, color='#586e75')
print(p)
    
    
 
    
<ggplot: (8763937444749)>
In [8]:
    
p = ggplot(df, aes(x='utr_s9', y='utr_s19', size='UTR_length')) \
        + geom_point(alpha=0.1) \
        + geom_abline(slope=1, intercept=0, size=2.5, color='#586e75')
print(p)
    
    
 
    
<ggplot: (-9223363272917432888)>
In [9]:
    
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import euclidean_distances
from sklearn.neighbors import kneighbors_graph
from sklearn import cluster
from sklearn import mixture
X = df.as_matrix(columns=['utr_s9', 'utr_s9+bcm'])
X = StandardScaler().fit_transform(X)
bandwidth = cluster.estimate_bandwidth(X, quantile=0.3)
connectivity = kneighbors_graph(X, n_neighbors=20)
connectivity = 0.05 * (connectivity + connectivity.T)
#distances = euclidean_distances(X)
gmm = mixture.GMM(n_components=2, covariance_type='full')
ms = cluster.MeanShift(bandwidth=bandwidth, bin_seeding=True)
two_means = cluster.MiniBatchKMeans(n_clusters=2, batch_size=200)
kmeans = cluster.KMeans(n_clusters=2)
ward = cluster.AgglomerativeClustering(n_clusters=2, linkage='ward', connectivity=connectivity)
spectral = cluster.SpectralClustering(n_clusters=2, n_neighbors=20, eigen_solver='arpack', affinity='nearest_neighbors')
dbscan = cluster.DBSCAN(eps=.5)
affinity_propagation = cluster.AffinityPropagation(damping=.95, preference=-200)
average_linkage = cluster.AgglomerativeClustering(linkage='average', affinity='cityblock', n_clusters=2, connectivity=connectivity)
for name, alg in [
                    ('MiniBatchKMeans', two_means),
                    ('KMeans', kmeans),
                    ('AffinityPropagation', affinity_propagation),
                    ('MeanShift', ms),
                    ('GMM', gmm),
                    ('SpectralClustering', spectral),
                    ('Ward', ward),
                    ('AgglomerativeClustering', average_linkage),
                    ('DBSCAN', dbscan)
                ]:
    alg.fit(X)
    if hasattr(alg, 'labels_'):
        df['label'] = alg.labels_.astype(np.int)
    else:
        df['label'] = alg.predict(X)
    
    p = ggplot(df, aes(x='utr_s9', y='utr_s9+bcm', color='label')) \
        + geom_point(alpha=0.5) \
        + ggtitle(name) \
        + geom_abline(slope=1, intercept=0, size=2.5, color='#586e75')
    print(p)
    
    
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/sklearn/neighbors/graph.py:37: DeprecationWarning: The behavior of 'kneighbors_graph' when mode='connectivity' will change in version 0.18. Presently, the nearest neighbor of each sample is the sample itself. Beginning in version 0.18, the default behavior will be to exclude each sample from being its own nearest neighbor. To maintain the current behavior, set include_self=True.
  "behavior, set include_self=True.", DeprecationWarning)
    
 
    
<ggplot: (8763937443690)>
    
 
    
<ggplot: (8763931127223)>
    
 
    
<ggplot: (8763924495480)>
    
 
    
<ggplot: (8763937442119)>
    
 
    
<ggplot: (-9223363272930384440)>
    
 
    
<ggplot: (-9223363272930297831)>
    
 
    
<ggplot: (-9223363272930371019)>
    
 
    
<ggplot: (8763924531895)>
    
 
    
<ggplot: (8763924256765)>
In [ ]:
    
X = df.as_matrix
    
Content source: eco32i/biodata
Similar notebooks: