In [1]:
%matplotlib inline

In [2]:
#from __future__ import division
import pandas as pd
import numpy as np
from ggplot import *
import matplotlib.pyplot as plt
import seaborn as sns

import os
import sys

In [3]:
# Sample titles with corresponding barcodes
# s9: WT
# s9+bcm: WT +BCM
# s17: triple sRNA mutant

samples = {
    's9': ['ATCACG', 'ACAGTG'],
    's9+bcm': ['CGATGT', 'GCCAAT'],
    's17': ['TTAGGC', 'GATCAG'],
    }

# Barcodes
barcodes = ['ATCACG', 'ACAGTG', 'CGATGT', 'GCCAAT', 'TTAGGC', 'GATCAG']

In [4]:
offsets = [150,200,300]
winsizes = [50,80,100,200]
output_tpl = '../results/dfa_mp.offset_{}.win_{}.csv'

output = []

for offset in offsets:
    for winsize in winsizes:
        df = pd.DataFrame.from_csv(output_tpl.format(offset, winsize))
        df['win'] = winsize
        df['offset'] = offset
        output.append(df)
        
dfa = pd.concat(output)

In [5]:
dfa['UTR_length'] = dfa['end_x'] - dfa['start_x']
dfa


Out[5]:
TSS end_x start_x gene strand_x end_y start_y strand_y strand ratio_ATCACG ratio_ACAGTG ratio_CGATGT ratio_GCCAAT win offset UTR_length
0 148 190 148 thrL + 255.0 190.0 + + 3.000000 2.784355 0.911828 3.178117 50 150 42
1 148 190 148 thrL + 255.0 190.0 + + 3.000000 2.784355 0.911828 3.178117 50 150 42
2 5030 5234 5030 yaaX + 5530.0 5234.0 + + 4.576923 6.983333 1.264901 1.436242 50 150 204
3 6587 6587 6459 yaaA - 6459.0 5683.0 - - 0.032028 0.072193 0.567568 0.600000 50 150 128
4 6615 6615 6459 yaaA - 6459.0 5683.0 - - 0.034091 0.090379 0.654135 0.582011 50 150 156
5 8017 8017 7959 yaaJ - 7959.0 6529.0 - - 0.875000 0.571429 0.885246 1.196262 50 150 58
6 8191 8238 8191 talB + 9191.0 8238.0 + + 0.478825 0.513356 0.473950 0.564393 50 150 47
9 11542 11542 11356 yaaW - 11356.0 10643.0 - - 0.666667 1.777778 1.327273 1.012658 50 150 186
10 11825 11825 11786 yaaI - 11786.0 11382.0 - - 0.500000 2.625000 0.652330 0.474874 50 150 39
11 11913 11913 11786 yaaI - 11786.0 11382.0 - - 0.333333 0.555556 1.748148 1.713376 50 150 127
12 11938 11938 11786 yaaI - 11786.0 11382.0 - - 0.857143 0.428571 1.100592 1.442623 50 150 152
13 12048 12163 12048 dnaK + 14079.0 12163.0 + + 0.252212 0.207481 0.171599 0.301158 50 150 115
14 12123 12163 12123 dnaK + 14079.0 12163.0 + + 0.869191 0.539653 0.430504 1.010352 50 150 40
15 12144 12163 12144 dnaK + 14079.0 12163.0 + + 0.979294 0.717621 0.513948 1.066012 50 150 19
18 16951 16951 16903 hokC - 16903.0 16751.0 - - 0.478261 0.569767 0.599631 0.459902 50 150 48
19 17317 17489 17317 nhaA + 18655.0 17489.0 + + 0.052632 0.126904 2.822222 1.647166 50 150 172
20 17458 17489 17458 nhaA + 18655.0 17489.0 + + 1.067073 1.989583 0.762238 1.602339 50 150 31
21 21120 21120 21078 rpsT - 21078.0 20815.0 - - 0.752518 0.615503 0.493768 0.752228 50 150 42
22 21210 21210 21078 rpsT - 21078.0 20815.0 - - 0.278619 0.579581 0.220507 0.358928 50 150 132
23 21383 21407 21383 ribF + 22348.0 21407.0 + + 0.922207 1.056693 0.849432 0.966921 50 150 24
24 21833 22391 21833 ileS + 25207.0 22391.0 + + 1.352113 1.040936 1.098859 1.163180 50 150 558
25 22034 22391 22034 ileS + 25207.0 22391.0 + + 0.528970 0.743542 0.934363 0.388699 50 150 357
26 22229 22391 22229 ileS + 25207.0 22391.0 + + 0.418221 0.240061 0.299776 0.510862 50 150 162
27 25014 25207 25014 lspA + 25701.0 25207.0 + + 0.850227 0.498730 0.592040 0.854137 50 150 193
28 28288 28374 28288 dapB + 29195.0 28374.0 + + 0.544828 1.341176 0.757576 0.496063 50 150 86
29 28343 28374 28343 dapB + 29195.0 28374.0 + + 1.752809 1.933333 1.785714 1.243902 50 150 31
30 29551 29651 29551 carA + 30799.0 29651.0 + + 0.790000 0.430233 0.240310 0.424419 50 150 100
31 29619 29651 29619 carA + 30799.0 29651.0 + + 0.788462 0.435897 0.461957 0.466912 50 150 32
32 30775 30817 30775 carB + 34038.0 30817.0 + + 0.513514 0.761194 0.406593 1.136000 50 150 42
33 34218 34300 34218 caiF + 34695.0 34300.0 + + 0.764706 1.388889 0.357143 0.403846 50 150 82
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
3754 4609344 4609414 4609344 prfC + 4611003.0 4609414.0 + + 1.043222 0.723374 0.815589 1.112554 200 300 70
3755 4609356 4609414 4609356 prfC + 4611003.0 4609414.0 + + 1.113715 0.751312 0.856031 1.154138 200 300 58
3756 4611153 4611396 4611153 osmY + 4612001.0 4611396.0 + + 1.070175 1.486726 0.866915 0.928707 200 300 243
3757 4616679 4617323 4616679 deoC + 4618102.0 4617323.0 + + 1.140625 0.526882 1.102041 0.934307 200 300 644
3758 4617278 4617323 4617278 deoC + 4618102.0 4617323.0 + + 1.826840 2.649815 1.678423 2.319249 200 300 45
3759 4619567 4619603 4619567 deoB + 4620826.0 4619603.0 + + 0.520743 0.548993 0.379441 0.564190 200 300 36
3760 4621657 4621769 4621657 yjjJ + 4623100.0 4621769.0 + + 3.920833 14.337209 1.594747 1.465487 200 300 112
3761 4621716 4621769 4621716 yjjJ + 4623100.0 4621769.0 + + 1.156682 2.305970 0.783037 0.875229 200 300 53
3762 4624238 4624238 4624117 lplA - 4624117.0 4623101.0 - - 2.214286 1.905263 1.272436 1.488294 200 300 121
3763 4624799 4624799 4624789 ytjB - 4624789.0 4624145.0 - - 1.145985 1.015267 0.684332 0.680734 200 300 10
3764 4624856 4624895 4624856 serB + 4625863.0 4624895.0 + + 1.471910 2.332288 1.002410 1.592798 200 300 39
3765 4630566 4630566 4630522 yjjK - 4630522.0 4628855.0 - - 0.975590 1.301775 0.613567 0.892770 200 300 44
3766 4630700 4630733 4630700 slt + 4632670.0 4630733.0 + + 0.843023 0.823171 0.951872 0.894191 200 300 33
3767 4632704 4632760 4632704 trpR + 4633086.0 4632760.0 + + 1.302372 1.444231 0.835112 0.975930 200 300 56
3768 4633773 4633773 4633745 yjjX - 4633745.0 4633233.0 - - 3.361868 3.631399 1.012745 1.262749 200 300 28
3769 4633899 4633899 4633745 yjjX - 4633745.0 4633233.0 - - 3.716738 5.043478 1.026196 1.508820 200 300 154
3770 4635243 4635521 4635243 creA + 4635994.0 4635521.0 + + 2.986159 2.272251 1.123288 1.083676 200 300 278
3771 4635353 4635353 4635310 rob - 4635310.0 4634441.0 - - 0.902421 0.560804 0.475946 0.853982 200 300 43
3772 4635477 4635521 4635477 creA + 4635994.0 4635521.0 + + 0.989286 1.091716 0.517182 0.904128 200 300 44
3773 4638160 4638178 4638160 creD + 4639530.0 4638178.0 + + 1.642857 3.800000 1.421053 0.748068 200 300 18
3774 4640358 4640402 4640358 yjjY + 4640542.0 4640402.0 + + 13.010830 11.512545 14.250000 7.067416 200 300 44
3775 4640508 4640508 4640306 arcA - 4640306.0 4639590.0 - - 1.163365 0.827256 0.823056 1.180585 200 300 202
3776 4640512 4640512 4640306 arcA - 4640306.0 4639590.0 - - 1.167142 0.837495 0.831858 1.187599 200 300 206
3777 4640535 4640535 4640306 arcA - 4640306.0 4639590.0 - - 1.057403 0.763457 0.804410 1.089905 200 300 229
3778 4640599 4640599 4640306 arcA - 4640306.0 4639590.0 - - 0.867294 0.786859 0.757342 0.862293 200 300 293
3779 4640681 4640681 4640306 arcA - 4640306.0 4639590.0 - - 0.542907 0.452288 0.399267 0.477665 200 300 375
3780 4640688 4640688 4640306 arcA - 4640306.0 4639590.0 - - 0.515849 0.440549 0.386567 0.455797 200 300 382
3781 4640801 4640801 4640306 arcA - 4640306.0 4639590.0 - - 0.089461 0.110785 0.126010 0.168638 200 300 495
3782 4640838 4640942 4640838 yjtD + 4641628.0 4640942.0 + + 1.639535 0.945946 1.095890 2.051546 200 300 104
3783 4640898 4640942 4640898 yjtD + 4641628.0 4640942.0 + + 1.056604 0.763636 0.952381 1.453782 200 300 44

43812 rows × 16 columns


In [7]:
dfa[(dfa['gene'] == 'thiM') & (dfa['UTR_length'] > 80)][[
        'TSS', 'gene', 'strand', 'UTR_length', 
        'ratio_ATCACG', 'ratio_ACAGTG', 'ratio_CGATGT', 'ratio_GCCAAT',
        'win', 'offset'
    ]]


Out[7]:
TSS gene strand UTR_length ratio_ATCACG ratio_ACAGTG ratio_CGATGT ratio_GCCAAT win offset
1834 2185451 thiM - 150 1.584848 0.964413 0.636616 0.751043 50 150
1834 2185451 thiM - 150 1.663934 0.911894 0.656350 0.808535 80 150
1834 2185451 thiM - 150 1.618926 0.937053 0.665323 0.828664 100 150
1834 2185451 thiM - 150 1.976852 1.450852 0.961695 1.173953 200 150
1834 2185451 thiM - 150 1.848057 0.918644 0.581013 0.769231 50 200
1834 2185451 thiM - 150 2.050505 1.023064 0.701254 0.913950 80 200
1834 2185451 thiM - 150 2.055195 1.039683 0.722101 0.968514 100 200
1834 2185451 thiM - 150 2.489796 1.647321 1.057348 1.364324 200 200
1834 2185451 thiM - 150 5.077670 2.779487 0.923541 1.542857 50 300
1834 2185451 thiM - 150 5.342105 2.971292 1.094306 1.779188 80 300
1834 2185451 thiM - 150 5.188525 3.075117 1.122449 1.805164 100 300
1834 2185451 thiM - 150 6.519084 4.834061 1.748148 2.544355 200 300

In [6]:
dfa[(dfa['gene'] == 'rpoS') & (dfa['UTR_length'] > 500)][[
        'TSS', 'gene', 'strand', 'UTR_length', 
        'ratio_ATCACG', 'ratio_ACAGTG', 'ratio_CGATGT', 'ratio_GCCAAT',
        'win', 'offset'
    ]]


Out[6]:
TSS gene strand UTR_length ratio_ATCACG ratio_ACAGTG ratio_CGATGT ratio_GCCAAT win offset
2317 2868118 rpoS - 567 3.881818 5.278481 3.323529 2.467213 50 150
2317 2868118 rpoS - 567 3.496241 5.264368 3.110169 2.340278 80 150
2317 2868118 rpoS - 567 3.453901 4.812183 3.112903 2.250000 100 150
2317 2868118 rpoS - 567 1.802013 2.800000 2.308901 1.290520 200 150
2317 2868118 rpoS - 567 5.207317 9.697674 4.402597 2.980198 50 200
2317 2868118 rpoS - 567 4.558824 6.887218 3.783505 2.632812 80 200
2317 2868118 rpoS - 567 4.234783 6.116129 3.784314 2.448980 100 200
2317 2868118 rpoS - 567 1.704762 3.175719 2.563953 1.128342 200 200
2317 2868118 rpoS - 567 2.259259 4.212121 3.766667 1.468293 50 300
2317 2868118 rpoS - 567 1.945607 3.914530 3.495238 1.225455 80 300
2317 2868118 rpoS - 567 1.837736 3.550562 3.271186 1.157556 100 300
2317 2868118 rpoS - 567 1.309756 1.707904 1.756972 0.829077 200 300

In [8]:
d = dfa[(dfa['UTR_length'] > 80)
        & (dfa['ratio_ATCACG'] > 2)
        & (dfa['offset'] == 200)
        & (dfa['win'] == 80)][['UTR_length', 'ratio_ATCACG','ratio_CGATGT']].copy()
d['log-bcm'] = np.log10(d['ratio_ATCACG'])
d['log+bcm'] = np.log10(d['ratio_CGATGT'])
d['loglen'] = np.log10(d['UTR_length'])

In [9]:
p = ggplot(d, aes(x='loglen', y='log-bcm')) \
        + geom_point(alpha=0.1) \
        + geom_smooth(method='lowess', span=1/5.)
print(p)


<ggplot: (8735717056958)>

In [10]:
p = ggplot(d, aes(x='loglen', y='log-bcm')) \
        + geom_point(alpha=0.1) \
        + geom_smooth(method='ma', window=25)
print(p)


/home/ilya/src/ggplot/ggplot/utils/smoothers.py:61: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=25).std()
  std_err = pd.rolling_std(y, window)
/home/ilya/src/ggplot/ggplot/utils/smoothers.py:62: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=25).mean()
  y = pd.rolling_mean(y, window)
<ggplot: (-9223363300952537905)>

In [11]:
p = ggplot(d, aes(x='loglen', y='log+bcm')) \
        + geom_point(alpha=0.1) \
        + geom_smooth(method='lowess', span=1/5.) \
        + scale_y_continuous(limits=(-1,2.5))
print(p)


<ggplot: (8735901100387)>

In [12]:
p = ggplot(d, aes(x='loglen', y='log+bcm')) \
        + geom_point(alpha=0.1) \
        + geom_smooth(method='ma', window=25) \
        + scale_y_continuous(limits=(-1,2.5))
print(p)


/home/ilya/src/ggplot/ggplot/utils/smoothers.py:61: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=25).std()
  std_err = pd.rolling_std(y, window)
/home/ilya/src/ggplot/ggplot/utils/smoothers.py:62: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=25).mean()
  y = pd.rolling_mean(y, window)
<ggplot: (-9223363300953698893)>

In [13]:
d = dfa[(dfa['UTR_length'] > 80)
        & (dfa['ratio_ATCACG'] > 2)
        & (dfa['offset'] == 200)
        & (dfa['win'] == 80)][['TSS', 'gene', 'UTR_length', 'ratio_ATCACG','ratio_CGATGT']].copy()
d['log-bcm'] = np.log10(d['ratio_ATCACG'])
d['log+bcm'] = np.log10(d['ratio_CGATGT'])
d['loglen'] = np.log10(d['UTR_length'])

In [14]:
d[d['gene'] == 'rpoS']


Out[14]:
TSS gene UTR_length ratio_ATCACG ratio_CGATGT log-bcm log+bcm loglen
2317 2868118 rpoS 567 4.558824 3.783505 0.658853 0.577894 2.753583

In [15]:
p = ggplot(d, aes(x='loglen', y='ratio_ATCACG')) \
        + geom_point(alpha=0.1) \
        + geom_smooth(method='lowess', span=1/17.)
print(p)


<ggplot: (-9223363300952519943)>

In [16]:
p = ggplot(d, aes(x='loglen', y='ratio_ATCACG')) \
        + geom_point(alpha=0.1) \
        + geom_smooth(method='ma', window=20)
print(p)


/home/ilya/src/ggplot/ggplot/utils/smoothers.py:61: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=20).std()
  std_err = pd.rolling_std(y, window)
/home/ilya/src/ggplot/ggplot/utils/smoothers.py:62: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=20).mean()
  y = pd.rolling_mean(y, window)
<ggplot: (8735902227355)>

In [17]:
p = ggplot(d, aes(x='loglen', y='ratio_ATCACG')) \
        + geom_point(alpha=0.1) \
        + scale_y_continuous(limits=(0,10))
print(p)


<ggplot: (-9223363300952520083)>

In [18]:
p = ggplot(d, aes(x='loglen', y='ratio_CGATGT')) \
        + geom_point(alpha=0.1) \
        + scale_y_continuous(limits=(0,10))
print(p)


<ggplot: (8735901096929)>

In [19]:
dfa[(dfa['gene'] == 'fadE')][[
        'TSS', 'gene', 'strand', 'UTR_length', 
        'ratio_ATCACG', 'ratio_ACAGTG', 'ratio_CGATGT', 'ratio_GCCAAT',
        'win', 'offset'
    ]]


Out[19]:
TSS gene strand UTR_length ratio_ATCACG ratio_ACAGTG ratio_CGATGT ratio_GCCAAT win offset
195 243406 fadE - 103 0.600000 0.272727 0.767347 0.562500 50 150
195 243406 fadE - 103 0.750000 0.653846 0.823333 0.775194 80 150
195 243406 fadE - 103 0.750000 0.777778 0.848765 0.829684 100 150
195 243406 fadE - 103 0.789474 0.529412 0.813653 0.925217 200 150
195 243406 fadE - 103 0.666667 0.500000 0.770492 0.718631 50 200
195 243406 fadE - 103 0.818182 0.894737 0.812500 0.934579 80 200
195 243406 fadE - 103 0.818182 0.840000 0.811209 0.963277 100 200
195 243406 fadE - 103 0.882353 0.642857 0.807692 1.074747 200 200
195 243406 fadE - 103 0.600000 0.200000 0.632997 0.790795 50 300
195 243406 fadE - 103 0.818182 0.500000 0.713873 1.060071 80 300
195 243406 fadE - 103 0.818182 0.583333 0.721785 1.110749 100 300
195 243406 fadE - 103 1.000000 0.562500 0.809174 1.260664 200 300

In [20]:
d = dfa[(dfa['UTR_length'] > 80)
        & (dfa['ratio_ATCACG'] > 2)][[
            'TSS', 'gene', 'UTR_length', 
            'ratio_ATCACG','ratio_CGATGT', 'offset', 'win']].copy()
d['log-bcm'] = np.log10(d['ratio_ATCACG'])
d['log+bcm'] = np.log10(d['ratio_CGATGT'])
d['loglen'] = np.log10(d['UTR_length'])

In [21]:
p = ggplot(d, aes(x='loglen', y='log-bcm')) \
        + geom_point(alpha=0.1, size=1) \
        + geom_smooth(method='lowess', span=1/5.) \
        + facet_grid('offset ~ win')
print(p)


<ggplot: (8735902227646)>

In [22]:
p = ggplot(d, aes(x='loglen', y='log-bcm')) \
        + geom_point(alpha=0.1, size=1) \
        + geom_smooth(method='ma', window=20) \
        + facet_grid('offset ~ win')
print(p)


/home/ilya/src/ggplot/ggplot/utils/smoothers.py:61: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=20).std()
  std_err = pd.rolling_std(y, window)
/home/ilya/src/ggplot/ggplot/utils/smoothers.py:62: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=20).mean()
  y = pd.rolling_mean(y, window)
<ggplot: (-9223363300954013892)>

In [23]:
p = ggplot(d, aes(x='loglen', y='log+bcm')) \
        + geom_point(alpha=0.1, size=1) \
        + geom_smooth(method='lowess', span=1/5.) \
        + facet_grid('offset ~ win')
print(p)


<ggplot: (8735902227569)>

In [24]:
p = ggplot(d, aes(x='loglen', y='log+bcm')) \
        + geom_point(alpha=0.1, size=1) \
        + geom_smooth(method='ma', window=20) \
        + facet_grid('offset ~ win')
print(p)


/home/ilya/src/ggplot/ggplot/utils/smoothers.py:61: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=20).std()
  std_err = pd.rolling_std(y, window)
/home/ilya/src/ggplot/ggplot/utils/smoothers.py:62: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=20).mean()
  y = pd.rolling_mean(y, window)
<ggplot: (-9223363300953630460)>

Long UTRs


In [25]:
d = dfa[(dfa['UTR_length'] > 80)
        & (dfa['UTR_length'] < 600)
        & (dfa['ratio_ATCACG'] > 2)
        & (dfa['offset'] == 200)
        & (dfa['win'] == 80)][['TSS', 'gene', 'UTR_length', 'ratio_ATCACG','ratio_CGATGT']].copy()
d['log-bcm'] = np.log2(d['ratio_ATCACG'])
d['log+bcm'] = np.log2(d['ratio_CGATGT'])
d['loglen'] = np.log2(d['UTR_length'])
d['diff'] = d['log-bcm'] - d['log+bcm']

d1 = d[['UTR_length', 'loglen', 'log-bcm']].rename(columns={'log-bcm': 'logratio'})
d1['bcm'] = '-'
d2 = d[['UTR_length', 'loglen', 'log+bcm']].rename(columns={'log+bcm': 'logratio'})
d2['bcm'] = '+'

_d = pd.concat([d1, d2])

In [26]:
d[d['gene'] == 'rpoS']


Out[26]:
TSS gene UTR_length ratio_ATCACG ratio_CGATGT log-bcm log+bcm loglen diff
2317 2868118 rpoS 567 4.558824 3.783505 2.188662 1.919723 9.147205 0.268938

In [27]:
p = ggplot(_d, aes(x='UTR_length', y='logratio', color='bcm')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='lowess', span=1/5., size=3) \
        + xlab("5' UTR length") \
        + ylab("log(proximal/distal)") \
        + theme(axis_title=element_text(size=20),
                axis_text=element_text(size=20))
print(p)


<ggplot: (8735900809855)>

In [28]:
p = ggplot(_d, aes(x='UTR_length', y='logratio', color='bcm')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='ma', level=0.9, window=25, size=7)
print(p)


/home/ilya/src/ggplot/ggplot/utils/smoothers.py:61: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=25).std()
  std_err = pd.rolling_std(y, window)
/home/ilya/src/ggplot/ggplot/utils/smoothers.py:62: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=25).mean()
  y = pd.rolling_mean(y, window)
<ggplot: (-9223363300952522012)>

In [29]:
p = ggplot(d, aes(x='UTR_length', y='diff')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='lowess', span=1/5., size=4)
print(p)


<ggplot: (-9223363300953983081)>

In [30]:
p = ggplot(d, aes(x='UTR_length', y='diff')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='ma', se=False, window=35, size=4)
print(p)


/home/ilya/src/ggplot/ggplot/utils/smoothers.py:61: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=35).std()
  std_err = pd.rolling_std(y, window)
/home/ilya/src/ggplot/ggplot/utils/smoothers.py:62: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=35).mean()
  y = pd.rolling_mean(y, window)
<ggplot: (-9223363301062477806)>

Replicate 2


In [31]:
d = dfa[(dfa['UTR_length'] > 80)
        & (dfa['UTR_length'] < 600)
        & (dfa['ratio_ACAGTG'] > 2)
        & (dfa['offset'] == 200)
        & (dfa['win'] == 80)][['TSS', 'gene', 'UTR_length', 'ratio_ACAGTG','ratio_GCCAAT']].copy()
d['log-bcm'] = np.log10(d['ratio_ACAGTG'])
d['log+bcm'] = np.log10(d['ratio_GCCAAT'])
d['loglen'] = np.log10(d['UTR_length'])
d['diff'] = d['log-bcm'] - d['log+bcm']

d1 = d[['UTR_length', 'loglen', 'log-bcm']].rename(columns={'log-bcm': 'logratio'})
d1['bcm'] = '-'
d2 = d[['UTR_length', 'loglen', 'log+bcm']].rename(columns={'log+bcm': 'logratio'})
d2['bcm'] = '+'

_d = pd.concat([d1, d2])

In [32]:
p = ggplot(_d, aes(x='UTR_length', y='logratio', color='bcm')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='lowess', span=1/5., size=3) \
        + xlab("5' UTR length") \
        + ylab("log(proximal/distal)") \
        + theme(axis_title=element_text(size=20),
                axis_text=element_text(size=20))
print(p)


<ggplot: (8735901116508)>

In [33]:
p = ggplot(_d, aes(x='UTR_length', y='logratio', color='bcm')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='ma', level=0.9, window=25, size=7)
print(p)


/home/ilya/src/ggplot/ggplot/utils/smoothers.py:61: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=25).std()
  std_err = pd.rolling_std(y, window)
/home/ilya/src/ggplot/ggplot/utils/smoothers.py:62: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=25).mean()
  y = pd.rolling_mean(y, window)
<ggplot: (8735900995269)>

Short UTRs


In [34]:
d = dfa[(dfa['UTR_length'] < 80)
        & (dfa['UTR_length'] > 0)
        & (dfa['offset'] == 200)
        & (dfa['win'] == 80)][['TSS', 'gene', 'UTR_length', 'ratio_ATCACG','ratio_CGATGT']].copy()
d['log-bcm'] = np.log10(d['ratio_ATCACG'])
d['log+bcm'] = np.log10(d['ratio_CGATGT'])
d['loglen'] = np.log10(d['UTR_length'])
d['diff'] = d['log-bcm'] - d['log+bcm']


d1 = d[['UTR_length', 'loglen', 'log-bcm']].rename(columns={'log-bcm': 'logratio'})
d1['bcm'] = '-'
d2 = d[['UTR_length', 'loglen', 'log+bcm']].rename(columns={'log+bcm': 'logratio'})
d2['bcm'] = '+'

_d = pd.concat([d1, d2])

In [35]:
p = ggplot(_d, aes(x='UTR_length', y='logratio', color='bcm')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='lowess', span=1/5., size=7) \
        + scale_y_continuous(limits=(-1,2)) \
        + xlab("5' UTR length") \
        + ylab("log(proximal/distal)") \
        + theme(axis_title=element_text(size=20),
                axis_text=element_text(size=20))
print(p)


<ggplot: (8735792311030)>

In [36]:
p = ggplot(_d, aes(x='UTR_length', y='logratio', color='bcm')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='ma', window=25, size=7) \
        + scale_y_continuous(limits=(-1,2))
print(p)


/home/ilya/src/ggplot/ggplot/utils/smoothers.py:61: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=25).std()
  std_err = pd.rolling_std(y, window)
/home/ilya/src/ggplot/ggplot/utils/smoothers.py:62: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=25).mean()
  y = pd.rolling_mean(y, window)
<ggplot: (-9223363300953724086)>

In [37]:
p = ggplot(d, aes(x='UTR_length', y='diff')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='lowess', span=1/5., size=4)
print(p)


<ggplot: (-9223363300953996691)>

In [38]:
p = ggplot(d, aes(x='UTR_length', y='diff')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='ma', se=False, window=35, size=4)
print(p)


/home/ilya/src/ggplot/ggplot/utils/smoothers.py:61: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=35).std()
  std_err = pd.rolling_std(y, window)
/home/ilya/src/ggplot/ggplot/utils/smoothers.py:62: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=35).mean()
  y = pd.rolling_mean(y, window)
<ggplot: (-9223363300953878121)>

Replicate 2


In [39]:
d = dfa[(dfa['UTR_length'] < 80)
        & (dfa['UTR_length'] > 0)
        & (dfa['offset'] == 200)
        & (dfa['win'] == 80)][['TSS', 'gene', 'UTR_length', 'ratio_ACAGTG','ratio_GCCAAT']].copy()
d['log-bcm'] = np.log2(d['ratio_ACAGTG'])
d['log+bcm'] = np.log2(d['ratio_GCCAAT'])
d['loglen'] = np.log2(d['UTR_length'])
d['diff'] = d['log-bcm'] - d['log+bcm']

d1 = d[['UTR_length', 'loglen', 'log-bcm']].rename(columns={'log-bcm': 'logratio'})
d1['bcm'] = '-'
d2 = d[['UTR_length', 'loglen', 'log+bcm']].rename(columns={'log+bcm': 'logratio'})
d2['bcm'] = '+'

_d = pd.concat([d1, d2])

In [40]:
p = ggplot(_d, aes(x='UTR_length', y='logratio', color='bcm')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='lowess', span=1/5., size=3) \
        + xlab("5' UTR length") \
        + ylab("log(proximal/distal)") \
        + theme(axis_title=element_text(size=20),
                axis_text=element_text(size=20))
print(p)


<ggplot: (-9223363300953763026)>

In [41]:
p = ggplot(_d, aes(x='UTR_length', y='logratio', color='bcm')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='ma', window=25, size=7)
print(p)


/home/ilya/src/ggplot/ggplot/utils/smoothers.py:61: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=25).std()
  std_err = pd.rolling_std(y, window)
/home/ilya/src/ggplot/ggplot/utils/smoothers.py:62: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=25).mean()
  y = pd.rolling_mean(y, window)
<ggplot: (-9223363300952533195)>

Diff


In [42]:
d = dfa[(dfa['UTR_length'] > 80)
        & (dfa['UTR_length'] < 700)
        & (dfa['offset'] == 200)
        & (dfa['win'] == 80)][['TSS', 'gene', 'UTR_length', 'ratio_ACAGTG','ratio_GCCAAT']].copy()
d['log-bcm'] = np.log10(d['ratio_ACAGTG'])
d['log+bcm'] = np.log10(d['ratio_GCCAAT'])
d['loglen'] = np.log10(d['UTR_length'])
d['diff'] = d['log-bcm'] - d['log+bcm']
d['bcm'] = '-'
d.loc[d['ratio_ACAGTG'] > 2, 'bcm'] = '+'

In [43]:
p = ggplot(d, aes(x='UTR_length', y='diff', color='bcm')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='lowess', span=1/5., size=3) \
        + xlab("5' UTR length") \
        + ylab("log(proximal/distal)") \
        + theme(axis_title=element_text(size=20),
                axis_text=element_text(size=20))
print(p)


<ggplot: (8735792256141)>

In [44]:
p = ggplot(d, aes(x='UTR_length', y='diff', color='bcm')) \
        + geom_point(alpha=0.25) \
        + geom_smooth(method='ma', window=50, size=3)
print(p)


/home/ilya/src/ggplot/ggplot/utils/smoothers.py:61: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=50).std()
  std_err = pd.rolling_std(y, window)
/home/ilya/src/ggplot/ggplot/utils/smoothers.py:62: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=50).mean()
  y = pd.rolling_mean(y, window)
<ggplot: (8735900714444)>

In [45]:
d = dfa[(dfa['UTR_length'] > 80)
        & (dfa['ratio_ATCACG'] > 2)
        & (dfa['offset'] == 200)
        & (dfa['win'] == 80)][['TSS', 'gene', 'UTR_length', 'ratio_ATCACG','ratio_CGATGT']].copy()
d['log-bcm'] = np.log10(d['ratio_ATCACG'])
d['log+bcm'] = np.log10(d['ratio_CGATGT'])
d['loglen'] = np.log10(d['UTR_length'])

d1 = d[['loglen', 'ratio_ATCACG']].rename(columns={'ratio_ATCACG': 'ratio'})
d1['bcm'] = '-'
d2 = d[['loglen', 'ratio_CGATGT']].rename(columns={'ratio_CGATGT': 'ratio'})
d2['bcm'] = '+'

_d = pd.concat([d1, d2])

In [46]:
p = ggplot(_d, aes(x='loglen', y='ratio', color='bcm')) \
        + geom_point(alpha=0.2) \
        + geom_smooth(method='lowess', span=1/5., size=3)
print(p)


<ggplot: (8735900870201)>

In [47]:
p = ggplot(_d, aes(x='loglen', y='ratio', color='bcm')) \
        + geom_point(alpha=0.2) \
        + geom_smooth(method='ma', window=20, size=5)
print(p)


/home/ilya/src/ggplot/ggplot/utils/smoothers.py:61: FutureWarning: pd.rolling_std is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=20).std()
  std_err = pd.rolling_std(y, window)
/home/ilya/src/ggplot/ggplot/utils/smoothers.py:62: FutureWarning: pd.rolling_mean is deprecated for Series and will be removed in a future version, replace with 
	Series.rolling(center=False,window=20).mean()
  y = pd.rolling_mean(y, window)
<ggplot: (8735902257037)>

In [48]:
p = ggplot(_d, aes(x='loglen', y='ratio', color='bcm')) \
        + geom_point(alpha=0.3) \
        + scale_y_continuous(limits=(0,10))
print(p)


<ggplot: (-9223363300952535443)>

In [50]:
samples_dict = {
    's9': ['ATCACG', 'ACAGTG'],
    's9+bcm': ['CGATGT', 'GCCAAT'],
    's17': ['TTAGGC', 'GATCAG'],
    }

utr_cols = ['TSS', 'gene', 'UTR_length', 
            'ratio_ATCACG','ratio_CGATGT',
            'ratio_ACAGTG','ratio_GCCAAT']

long_utrs = dfa[(dfa['UTR_length'] > 80)
            & (dfa['UTR_length'] < 600)
            & (dfa['ratio_ACAGTG'] > 2)
            & (dfa['offset'] == 200)
            & (dfa['win'] == 80)][utr_cols].copy()
    

short_utrs = dfa[(dfa['UTR_length'] < 80)
        & (dfa['UTR_length'] > 0)
        & (dfa['offset'] == 200)
        & (dfa['win'] == 80)][utr_cols].copy()


def utr_scatter(data, samples, cond=None, save_csv=False):
    '''
    `samples`: list of sample_ids from samples_dict
    `cond`: list of conditions
    '''
    
    def mark_rho(rec):
        if rec['gene'] == 'rpoS' and rec['UTR_length'] > 500:
            return 'rpoS'
        else:
            return ''

    
    res = []
    for i,sample in enumerate(samples):
        d = data[['UTR_length', 'gene']]
        d['loglen'] = np.log10(data['UTR_length'])
        dtmp = data[['ratio_{}'.format(bc) for bc in samples_dict[sample]]]
        for barcode in samples_dict[sample]:
            dtmp[barcode] = np.log10(data['ratio_{}'.format(barcode)])
        d['logratio'] = dtmp[[bc for bc in samples_dict[sample]]].mean(axis=1)
        if cond and len(cond) == len(samples):
            d['cond'] = cond[i]
        else:
            d['cond'] = 'cond_{}'.format(i)
        d['label'] = d.apply(mark_rho, axis=1)
        res.append(d)
    df = pd.concat(res)
    if save_csv:
        df.to_csv('../../results/redux/fig_1b.df.csv', sep='\t')
    
    p = ggplot(df, aes(x='UTR_length', y='logratio', color='cond', label='label')) \
        + geom_point(alpha=0.25) \
        + geom_text(color="black", nudge_x=20) \
        + geom_smooth(method='lowess', span=1/5., size=3) \
        + xlab("5' UTR length") \
        + ylab("log(proximal/distal)") \
        + theme(axis_title=element_text(size=20),
                axis_text=element_text(size=20))
    print(p)

In [51]:
utr_scatter(long_utrs,  ['s9', 's9+bcm'], cond=['-bcm', '+bcm'], save_csv=True)


/home/ilya/.venv/pydata/lib/python3.4/site-packages/ipykernel/__main__.py:44: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata/lib/python3.4/site-packages/ipykernel/__main__.py:47: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata/lib/python3.4/site-packages/ipykernel/__main__.py:48: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata/lib/python3.4/site-packages/ipykernel/__main__.py:50: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata/lib/python3.4/site-packages/ipykernel/__main__.py:53: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
<ggplot: (8735792384958)>

In [73]:
utr_scatter(short_utrs,  ['s9', 's9+bcm'], cond=['-bcm', '+bcm'])


/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:44: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:47: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:48: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:50: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:53: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
<ggplot: (-9223363267839446482)>

In [78]:
samples_dict = {
    's9': ['ATCACG'],
    's9+bcm': ['CGATGT'],
}

In [79]:
utr_scatter(long_utrs,  ['s9', 's9+bcm'], cond=['-bcm', '+bcm'])


/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:44: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:47: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:48: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:50: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:53: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
<ggplot: (8768893204565)>

In [80]:
utr_scatter(short_utrs,  ['s9', 's9+bcm'], cond=['-bcm', '+bcm'])


/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:44: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:47: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:48: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:50: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:53: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
<ggplot: (8769015399081)>

In [81]:
samples_dict = {
    's9': ['ACAGTG'],
    's9+bcm': ['GCCAAT'],
}

In [82]:
utr_scatter(long_utrs,  ['s9', 's9+bcm'], cond=['-bcm', '+bcm'])


/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:44: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:47: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:48: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:50: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:53: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
<ggplot: (8768893257564)>

In [83]:
utr_scatter(short_utrs,  ['s9', 's9+bcm'], cond=['-bcm', '+bcm'])


/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:44: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:47: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:48: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:50: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
/home/ilya/.venv/pydata3/lib/python3.4/site-packages/ipykernel/__main__.py:53: SettingWithCopyWarning: 
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
<ggplot: (-9223363267971729092)>

In [ ]: