Maren Equations Summary

This notebook is just pulling out the important figures and tables for the manuscript. For more detailed explanations and exploring see other notebooks.



In [1]:

    
# Set-up default environment
%run '../ipython_startup.py'

# Import additional libraries
import sas7bdat as sas
import cPickle as pickle
import statsmodels.formula.api as smf

from ase_cisEq import marenEq
from ase_cisEq import marenPrintTable

from ase_normalization import meanCenter
from ase_normalization import q3Norm
from ase_normalization import meanStd

pjoin = os.path.join









    



Importing commonly used libraries: 
            os, sys 
            numpy as np 
            scipy as sp 
            pandas as pd 
            matplotlib as mp 
            matplotlib.pyplot as plt
            datetime as dt 
            mclib_Python/flagging as fg

Creating project level variables: 
        MCLAB = /home/jfear/mclab 
        PROJ = /home/jfear/mclab/cegs_ase_paper 
        TODAY = 20150929

Adding ['scripts/mclib_Python', 'scripts/ase_Python'] to PYTHONPATH

Import clean data set

This data set was created by: ase_summarize_ase_filters.sas

The data has had the following droped:

regions that were always bias in 100 genome simulation
regions with APN $\le 25$
regions not in at least 10% of genotypes
regions not in mated and virgin
genotypes with extreme bias in median(q5_mean_theta)
genotypes with $\le500$ regions



In [2]:

    
# Import clean dataset
with sas.SAS7BDAT(pjoin(PROJ, 'sas_data/clean_ase_stack.sas7bdat')) as FH:
    df = FH.to_data_frame()
    
dfClean = df[['line', 'mating_status', 'fusion_id', 'flag_AI_combined', 'q5_mean_theta', 'sum_both', 'sum_line', 'sum_tester', 'sum_total', 'mean_apn']]
print 'Rows ' + str(dfClean.shape[0])
print 'Columns ' + str(dfClean.shape[1])
print 'Number of Genotypes ' + str(len(set(dfClean['line'].tolist())))
print 'Number of Exonic Regions ' + str(len(set(dfClean['fusion_id'].tolist())))









    



[clean_ase_stack.sas7bdat] header length 65536 != 8192
WARNING:/home/jfear/mclab/cegs_ase_paper/sas_data/clean_ase_stack.sas7bdat:[clean_ase_stack.sas7bdat] header length 65536 != 8192






    



Rows 159934
Columns 10
Number of Genotypes 49
Number of Exonic Regions 5391

Additional cleaning

For the maren equations, I am also going to drop exonic regions with less than 10 genotypes. The maren equations make some assumptions about the population level sums. Obvisouly the more genotypes that are present for each fusions the better, but I am comfortable with as few as 10 genotypes.



In [3]:

    
# Drop groups with less than 10 lines per fusion
grp = dfClean.groupby(['mating_status', 'fusion_id'])
dfGt10 = grp.filter(lambda x: x['line'].count() >= 10).copy()
print 'Rows ' + str(dfGt10.shape[0])
print 'Columns ' + str(dfGt10.shape[1])
print 'Number of Genotypes ' + str(len(set(dfGt10['line'].tolist())))
print 'Number of Exonic Regions ' + str(len(set(dfGt10['fusion_id'].tolist())))









    



Rows 131700
Columns 10
Number of Genotypes 49
Number of Exonic Regions 3053

Raw Counts



In [4]:

    
# Calculate Maren TIG equations by mating status and exonic region
marenRawCounts = marenEq(dfGt10, Eii='sum_line', Eti='sum_tester', group=['mating_status', 'fusion_id'])
marenRawCounts['mag_cis'] = abs(marenRawCounts['cis_line'])
marenRawCounts.columns









    Out[4]:





Index([            u'line',    u'mating_status',        u'fusion_id',
       u'flag_AI_combined',    u'q5_mean_theta',         u'sum_both',
               u'sum_line',       u'sum_tester',        u'sum_total',
               u'mean_apn',               u'mu',         u'cis_line',
             u'trans_line',       u'cis_tester',     u'trans_tester',
                u'mag_cis'],
      dtype='object')

Plot Distribution of cis- and trans-effects



In [5]:

    
# Plot densities
def panelKde(df, **kwargs):
    options = {'subplots': True, 
               'layout': (7, 7), 
               'figsize': (20, 20), 
               'xlim': (-500, 500), 
               'legend': False,
               'color': 'k'}
    options.update(kwargs)
    
    # Make plot
    axes = df.plot(kind='kde', **options)
    
    # Add titles intead of legends
    try:
        for ax in axes.ravel():
            h, l = ax.get_legend_handles_labels()
            ax.set_title(l[0])
            ax.get_yaxis().set_visible(False)
            ax.axvline(0, lw=1)
    except:
        ax = axes
        ax.get_yaxis().set_visible(False)
        ax.axvline(0, lw=1)
    
    return plt.gcf()
    
def linePanel(df, value='cis_line', index='fusion_id', columns='line'):
    
    mymap = {
        'cis_line': 'cis-Line Effects',
        'trans_line': 'trans-Line Effects',
        'line': 'genotype',
        'fusion_id': 'exonic_region'
    }
    
    # Iterate over mated and virgin
    for k, v in {'M': 'Mated', 'V': 'Virgin'}.iteritems():
        
        # Pivot data frame so that the thing you want to make panels by is in columns.
        dfPiv = pd.pivot_table(df[df['mating_status'] == k], 
                               values=value, index=index, columns=columns)
        
        # Generate panel plot with at most 49 panels
        if value == 'cis_line':
            xlim = (-500, 500)
        else:
            # trans-effects appear to be larger in magnitude
            xlim = (-1500, 1500)
            
        fig = panelKde(dfPiv.iloc[:, :49], xlim=xlim)
        
        title = '{}\n{}'.format(mymap[value], v)
        fig.suptitle(title, fontsize=18, fontweight='bold')
        
        fname = pjoin(PROJ, 'pipeline_output/cis_effects/density_plot_by_{}_{}_{}.png'.format(mymap[columns], value, v.lower()))
        plt.savefig(fname, bbox_inches='tight')
        print("Saved figure to: " + fname)
        
        plt.close()
        
def testerPanel(df, value='cis_tester'):
    mymap = {
        'cis_tester': 'cis-Tester Effects',
        'trans_tester': 'trans-Trans Effects'
    }
    
    # Iterate over mated and virgin
    for k, v in {'M': 'Mated', 'V': 'Virgin'}.iteritems():
        
        # Split table by mating status and drop duplicates, because 
        # there is only one tester value for each exonic region
        dfSub = df.ix[df['mating_status'] == k,['fusion_id', value]].drop_duplicates()
        
        # Generate Panel Plot
        fig = panelKde(dfSub, subplots=False)
        
        title = '{}\n{}'.format(mymap[value], v)
        fig.suptitle(title, fontsize=18, fontweight='bold')
        
        fname = pjoin(PROJ, 'pipeline_output/cis_effects/density_plot_{}_{}.png'.format(value, v.lower()))
        plt.savefig(fname, bbox_inches='tight')
        print("Saved figure to: " + fname)
        
        plt.close()



In [7]:

    
# Cis and trans line effects by genotype
linePanel(marenRawCounts, value='cis_line', index='fusion_id', columns='line')
linePanel(marenRawCounts, value='trans_line', index='fusion_id', columns='line')

# Cis and trans line effects by exonic region
linePanel(marenRawCounts, value='cis_line', index='line', columns='fusion_id')
linePanel(marenRawCounts, value='trans_line', index='line', columns='fusion_id')

# Cis and trans tester effects
testerPanel(marenRawCounts, value='cis_tester')
testerPanel(marenRawCounts, value='trans_tester')









    



Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/density_plot_by_genotype_cis_line_mated.png
Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/density_plot_by_genotype_cis_line_virgin.png
Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/density_plot_by_genotype_trans_line_mated.png
Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/density_plot_by_genotype_trans_line_virgin.png
Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/density_plot_by_exonic_region_cis_line_mated.png
Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/density_plot_by_exonic_region_cis_line_virgin.png
Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/density_plot_by_exonic_region_trans_line_mated.png
Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/density_plot_by_exonic_region_trans_line_virgin.png
Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/density_plot_cis_tester_mated.png
Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/density_plot_cis_tester_virgin.png
Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/density_plot_trans_tester_mated.png
Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/density_plot_trans_tester_virgin.png

Plot cis- and trans-effects vs Allelic Proportion



In [14]:

    
# Set Globals
SHAPES = {'M': 'o', 'V': '^'}
CMAP='jet'

# Add color column to color by genotype
colors = {}
cnt = 0
genos = set(dfGt10['line'].tolist())
for l in genos:
    colors[l] = cnt
    cnt += 1

marenRawCounts['color'] = marenRawCounts['line'].map(colors)



In [15]:

    
# Plotting scatter
def getR2(df, x, y):
    """ Calculate the R-squared using OLS with an intercept """
    formula = '{} ~ {} + 1'.format(y, x)
    return smf.ols(formula, df).fit().rsquared

def scatPlt(df, x, y, c=None, cmap='jet', s=50, marker='o', ax=None, title=None, xlab=None, ylab=None, diag='pos'):
    """ Make a scatter plot using some default options """
    
    ax = df.plot(x, y, kind='scatter', ax=ax, c=c, cmap=cmap, s=s, marker=marker, title=title, colorbar=False)
    
    # Add a diag line
    if diag == 'neg':
        # draw a diag line with negative slope
        ax.plot([0, 1], [1, 0], transform=ax.transAxes)
    elif diag == 'pos':
        # draw a diag line with positive slope
        ax.plot([0, 1], [0, 1], transform=ax.transAxes)
        
    ax.set_xlabel(xlab)
    ax.set_ylabel(ylab)
    
    return ax

def scatPltPanel(df, line='sum_line', tester='sum_tester', x='cis_line', y='prop', cmap='jet', s=60, panel_title=None, diag='pos'):
    """ Make a panel of scatter plots using pandas """
    # Plot the cis-line effects x proportion Line by fusion
    df['prop'] = 1 - df[tester] / (df[line] + df[tester])
    
    # Create 5x5 panel plot
    fig, axes = plt.subplots(5, 5, figsize=(20, 20))
    fig.suptitle(panel_title, fontsize=12, fontweight='bold')
    axes = axes.ravel()

    # Group by fusion_id
    for i, (n, gdf) in enumerate(df.groupby('fusion_id')):
        ax = axes[i]
                    
        # Calculate R-squared value
        r2 = getR2(gdf, x, y)

        # Make new title with R-squared in it
        t = '{}\nR^2: {}'.format(n, round(r2, 3))

        # Change marker style based on mating status and plot
        for ms, msdf in gdf.groupby('mating_status'):
            scatPlt(msdf, x, y, c='color', cmap=cmap, s=s, marker=SHAPES[ms], ax=ax, title=t, xlab=x, ylab=y, diag=diag)

        # only plot 25 fusions
        if i == 24:
            break
    
    fname = pjoin(PROJ, 'pipeline_output/cis_effects/scatter_plot_by_exonic_region_{}_v_{}.png'.format(x, y))
    plt.savefig(fname, bbox_inches='tight')
    print("Saved figure to: " + fname)
    
    plt.close()



In [16]:

    
# Plot the cis-line effects x proportion by fusion
scatPltPanel(marenRawCounts, line='sum_line', tester='sum_tester', cmap=CMAP, panel_title='Raw Counts: cis-line')









    



Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/scatter_plot_by_exonic_region_cis_line_v_prop.png



In [17]:

    
# Plot the trans-line effects x proportion by fusion
scatPltPanel(marenRawCounts, line='sum_line', tester='sum_tester', x='trans_line', cmap=CMAP, panel_title='Raw Counts: trans-line', diag='neg')









    



Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/scatter_plot_by_exonic_region_trans_line_v_prop.png

Plot cis- and trans-effects vs Allelic Proportion for Specific Exonic Regions



In [18]:

    
# Plot F10005_SI
FUSION='F10005_SI'

dfFus = marenRawCounts[marenRawCounts['fusion_id'] == FUSION].copy()
dfFus['prop'] = 1 - dfFus['sum_tester'] / (dfFus['sum_line'] + dfFus['sum_tester'])

# Generate 3 panel plot
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
fig.suptitle(FUSION, fontsize=14, fontweight='bold')

for n, mdf in dfFus.groupby('mating_status'):
    # Plot the cis-line effects x proportion by fusion
    scatPlt(mdf, x='cis_line', y='prop', ax=axes[0], c='color', cmap=CMAP, marker=SHAPES[n], title='cis-line', xlab='cis-line', ylab='prop')

    # Plot the trans-line effects x proportion by fusion
    scatPlt(mdf, x='trans_line', y='prop', ax=axes[1], c='color', cmap=CMAP, marker=SHAPES[n], title='trans-line', xlab='trans-line', ylab='prop', diag='neg')

    # Plot the Tester effects x proportion by fusion
    scatPlt(mdf, x='cis_tester', y='prop', ax=axes[2], c='color', cmap=CMAP,  marker=SHAPES[n], title='Tester', xlab='cis-tester', ylab='prop', diag=None)

fname = pjoin(PROJ, 'pipeline_output/cis_effects/scatter_plot_{}_effects_v_prop.png'.format(FUSION))
plt.savefig(fname, bbox_inches='tight')
print("Saved figure to: " + fname)

plt.close()









    



Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/scatter_plot_F10005_SI_effects_v_prop.png

F10317_SI

This fusion has a weaker cis-line effects but trans-line effects look more linear.



In [19]:

    
# Plot F10317_SI
FUSION='F10317_SI'

dfFus = marenRawCounts[marenRawCounts['fusion_id'] == FUSION].copy()
dfFus['prop'] = 1 - dfFus['sum_tester'] / (dfFus['sum_line'] + dfFus['sum_tester'])

# Generate 3 panel plot
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
fig.suptitle(FUSION, fontsize=14, fontweight='bold')

for n, mdf in dfFus.groupby('mating_status'):
    # Plot the cis-line effects x proportion by fusion
    scatPlt(mdf, x='cis_line', y='prop', ax=axes[0], c='color', cmap=CMAP, marker=SHAPES[n], title='cis-line', xlab='cis-line', ylab='prop')

    # Plot the trans-line effects x proportion by fusion
    scatPlt(mdf, x='trans_line', y='prop', ax=axes[1], c='color', cmap=CMAP, marker=SHAPES[n], title='trans-line', xlab='trans-line', ylab='prop', diag='neg')

    # Plot the Tester effects x proportion by fusion
    scatPlt(mdf, x='cis_tester', y='prop', ax=axes[2], c='color', cmap=CMAP,  marker=SHAPES[n], title='Tester', xlab='cis-tester', ylab='prop', diag=None)

fname = pjoin(PROJ, 'pipeline_output/cis_effects/scatter_plot_{}_effects_v_prop.png'.format(FUSION))
plt.savefig(fname, bbox_inches='tight')
print("Saved figure to: " + fname)

plt.close()









    



Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/scatter_plot_F10317_SI_effects_v_prop.png

F10482_SI

This fusion has a weaker cis-line effects but trans-line effects look more linear.



In [20]:

    
# Plot F10482_SI
FUSION='F10482_SI'

dfFus = marenRawCounts[marenRawCounts['fusion_id'] == FUSION].copy()
dfFus['prop'] = 1 - dfFus['sum_tester'] / (dfFus['sum_line'] + dfFus['sum_tester'])

# Generate 3 panel plot
fig, axes = plt.subplots(1, 3, figsize=(12, 4))
fig.suptitle(FUSION, fontsize=14, fontweight='bold')

for n, mdf in dfFus.groupby('mating_status'):
    # Plot the cis-line effects x proportion by fusion
    scatPlt(mdf, x='cis_line', y='prop', ax=axes[0], c='color', cmap=CMAP, marker=SHAPES[n], title='cis-line', xlab='cis-line', ylab='prop')

    # Plot the trans-line effects x proportion by fusion
    scatPlt(mdf, x='trans_line', y='prop', ax=axes[1], c='color', cmap=CMAP, marker=SHAPES[n], title='trans-line', xlab='trans-line', ylab='prop', diag='neg')

    # Plot the Tester effects x proportion by fusion
    scatPlt(mdf, x='cis_tester', y='prop', ax=axes[2], c='color', cmap=CMAP,  marker=SHAPES[n], title='Tester', xlab='cis-tester', ylab='prop', diag=None)

fname = pjoin(PROJ, 'pipeline_output/cis_effects/scatter_plot_{}_effects_v_prop.png'.format(FUSION))
plt.savefig(fname, bbox_inches='tight')
print("Saved figure to: " + fname)

plt.close()









    



Saved figure to: /home/jfear/mclab/cegs_ase_paper/pipeline_output/cis_effects/scatter_plot_F10482_SI_effects_v_prop.png



In [71]:

    
f1005









    Out[71]:






  
    
      
      line
      mating_status
      fusion_id
      flag_AI_combined
      q5_mean_theta
      sum_both
      sum_line
      sum_tester
      sum_total
      mean_apn
      mu
      cis_line
      trans_line
      cis_tester
      trans_tester
      mag_cis
    
  
  
    
      10
      r101
      M
      F10005_SI
      0
      0.475
      1274
      155
      133
      1562
      29.523922
      353.733333
      28.666667
      -448.133333
      6.666667
      -6.666667
      28.666667
    
    
      11
      r280
      M
      F10005_SI
      0
      0.545
      1208
      126
      143
      1477
      27.917307
      353.733333
      -10.333333
      -428.133333
      6.666667
      -6.666667
      10.333333
    
    
      12
      r315
      M
      F10005_SI
      0
      0.490
      1231
      237
      218
      1686
      31.867690
      353.733333
      25.666667
      -278.133333
      6.666667
      -6.666667
      25.666667
    
    
      13
      r324
      M
      F10005_SI
      1
      0.450
      2554
      349
      276
      3179
      60.087419
      353.733333
      79.666667
      -162.133333
      6.666667
      -6.666667
      79.666667
    
    
      14
      r335
      M
      F10005_SI
      0
      0.514
      1339
      215
      217
      1771
      33.474306
      353.733333
      4.666667
      -280.133333
      6.666667
      -6.666667
      4.666667
    
    
      15
      r340
      M
      F10005_SI
      1
      0.241
      1897
      332
      102
      2331
      44.059067
      353.733333
      236.666667
      -510.133333
      6.666667
      -6.666667
      236.666667
    
    
      16
      r357
      M
      F10005_SI
      1
      0.542
      2797
      358
      408
      3563
      67.345540
      353.733333
      -43.333333
      101.866667
      6.666667
      -6.666667
      43.333333
    
    
      17
      r358
      M
      F10005_SI
      0
      0.480
      1494
      185
      162
      1841
      34.797401
      353.733333
      29.666667
      -390.133333
      6.666667
      -6.666667
      29.666667
    
    
      18
      r365
      M
      F10005_SI
      0
      0.508
      2313
      385
      386
      3084
      58.291790
      353.733333
      5.666667
      57.866667
      6.666667
      -6.666667
      5.666667
    
    
      19
      r373
      M
      F10005_SI
      0
      0.450
      1546
      146
      113
      1805
      34.116952
      353.733333
      39.666667
      -488.133333
      6.666667
      -6.666667
      39.666667
    
    
      20
      r374
      M
      F10005_SI
      0
      0.412
      1703
      231
      155
      2089
      39.484938
      353.733333
      82.666667
      -404.133333
      6.666667
      -6.666667
      82.666667
    
    
      21
      r380
      M
      F10005_SI
      0
      0.472
      1584
      295
      254
      2133
      40.316598
      353.733333
      47.666667
      -206.133333
      6.666667
      -6.666667
      47.666667
    
    
      22
      r427
      M
      F10005_SI
      0
      0.501
      4246
      522
      510
      5278
      99.761370
      353.733333
      18.666667
      305.866667
      6.666667
      -6.666667
      18.666667
    
    
      23
      r491
      M
      F10005_SI
      0
      0.484
      3827
      657
      599
      5083
      96.075605
      353.733333
      64.666667
      483.866667
      6.666667
      -6.666667
      64.666667
    
    
      24
      r517
      M
      F10005_SI
      0
      0.480
      2591
      580
      520
      3691
      69.764914
      353.733333
      66.666667
      325.866667
      6.666667
      -6.666667
      66.666667
    
    
      25
      r732
      M
      F10005_SI
      0
      0.471
      1281
      276
      237
      1794
      33.909037
      353.733333
      45.666667
      -240.133333
      6.666667
      -6.666667
      45.666667
    
    
      26
      r737
      M
      F10005_SI
      0
      0.566
      1576
      126
      156
      1858
      35.118724
      353.733333
      -23.333333
      -402.133333
      6.666667
      -6.666667
      23.333333
    
    
      27
      r799
      M
      F10005_SI
      0
      0.487
      2798
      520
      480
      3798
      71.787360
      353.733333
      46.666667
      245.866667
      6.666667
      -6.666667
      46.666667
    
    
      28
      r820
      M
      F10005_SI
      0
      0.442
      1880
      236
      180
      2296
      43.397519
      353.733333
      62.666667
      -354.133333
      6.666667
      -6.666667
      62.666667
    
    
      29
      r85
      M
      F10005_SI
      0
      0.606
      1086
      131
      191
      1408
      26.613113
      353.733333
      -53.333333
      -332.133333
      6.666667
      -6.666667
      53.333333
    
    
      30
      w114
      M
      F10005_SI
      0
      0.524
      1042
      230
      243
      1515
      28.635558
      353.733333
      -6.333333
      -228.133333
      6.666667
      -6.666667
      6.333333
    
    
      31
      w38
      M
      F10005_SI
      1
      0.537
      2046
      444
      501
      2991
      56.533963
      353.733333
      -50.333333
      287.866667
      6.666667
      -6.666667
      50.333333
    
    
      32
      w47
      M
      F10005_SI
      0
      0.504
      5492
      1425
      1417
      8334
      157.523922
      353.733333
      14.666667
      2119.866667
      6.666667
      -6.666667
      14.666667
    
    
      33
      w52
      M
      F10005_SI
      0
      0.501
      3848
      407
      397
      4652
      87.929120
      353.733333
      16.666667
      79.866667
      6.666667
      -6.666667
      16.666667
    
    
      34
      w55
      M
      F10005_SI
      1
      0.784
      4191
      252
      882
      5325
      100.649734
      353.733333
      -623.333333
      1049.866667
      6.666667
      -6.666667
      623.333333
    
    
      35
      w59
      M
      F10005_SI
      0
      0.481
      1429
      367
      329
      2125
      40.165387
      353.733333
      44.666667
      -56.133333
      6.666667
      -6.666667
      44.666667
    
    
      36
      w64
      M
      F10005_SI
      0
      0.467
      2153
      284
      239
      2676
      50.580035
      353.733333
      51.666667
      -236.133333
      6.666667
      -6.666667
      51.666667
    
    
      37
      w68
      M
      F10005_SI
      0
      0.460
      1915
      371
      305
      2591
      48.973420
      353.733333
      72.666667
      -104.133333
      6.666667
      -6.666667
      72.666667
    
    
      38
      w76
      M
      F10005_SI
      0
      0.578
      1757
      286
      376
      2419
      45.722386
      353.733333
      -83.333333
      37.866667
      6.666667
      -6.666667
      83.333333
    
    
      39
      w79
      M
      F10005_SI
      0
      0.611
      2596
      384
      583
      3563
      67.345540
      353.733333
      -192.333333
      451.866667
      6.666667
      -6.666667
      192.333333
    
    
      65860
      r101
      V
      F10005_SI
      1
      0.461
      4444
      454
      377
      5275
      99.704666
      370.816667
      74.433333
      14.933333
      -2.566667
      2.566667
      74.433333
    
    
      65861
      r280
      V
      F10005_SI
      0
      0.481
      1472
      236
      210
      1918
      36.252806
      370.816667
      23.433333
      -319.066667
      -2.566667
      2.566667
      23.433333
    
    
      65862
      r315
      V
      F10005_SI
      1
      0.436
      2754
      674
      508
      3936
      74.395747
      370.816667
      163.433333
      276.933333
      -2.566667
      2.566667
      163.433333
    
    
      65863
      r324
      V
      F10005_SI
      0
      0.441
      2473
      331
      252
      3056
      57.762552
      370.816667
      76.433333
      -235.066667
      -2.566667
      2.566667
      76.433333
    
    
      65864
      r335
      V
      F10005_SI
      0
      0.498
      1359
      185
      175
      1719
      32.491435
      370.816667
      7.433333
      -389.066667
      -2.566667
      2.566667
      7.433333
    
    
      65865
      r340
      V
      F10005_SI
      0
      0.511
      1661
      166
      165
      1992
      37.651506
      370.816667
      -1.566667
      -409.066667
      -2.566667
      2.566667
      1.566667
    
    
      65866
      r357
      V
      F10005_SI
      0
      0.520
      2373
      322
      335
      3030
      57.271116
      370.816667
      -15.566667
      -69.066667
      -2.566667
      2.566667
      15.566667
    
    
      65867
      r358
      V
      F10005_SI
      0
      0.526
      2075
      238
      252
      2565
      48.481985
      370.816667
      -16.566667
      -235.066667
      -2.566667
      2.566667
      16.566667
    
    
      65868
      r365
      V
      F10005_SI
      0
      0.500
      3119
      489
      474
      4082
      77.155346
      370.816667
      12.433333
      208.933333
      -2.566667
      2.566667
      12.433333
    
    
      65869
      r373
      V
      F10005_SI
      0
      0.426
      3498
      371
      266
      4135
      78.157118
      370.816667
      102.433333
      -207.066667
      -2.566667
      2.566667
      102.433333
    
    
      65870
      r374
      V
      F10005_SI
      0
      0.527
      2930
      401
      433
      3764
      71.144714
      370.816667
      -34.566667
      126.933333
      -2.566667
      2.566667
      34.566667
    
    
      65871
      r380
      V
      F10005_SI
      0
      0.477
      1724
      371
      327
      2422
      45.779090
      370.816667
      41.433333
      -85.066667
      -2.566667
      2.566667
      41.433333
    
    
      65872
      r427
      V
      F10005_SI
      0
      0.479
      3023
      367
      326
      3716
      70.237448
      370.816667
      38.433333
      -87.066667
      -2.566667
      2.566667
      38.433333
    
    
      65873
      r491
      V
      F10005_SI
      0
      0.517
      2550
      448
      465
      3463
      65.455405
      370.816667
      -19.566667
      190.933333
      -2.566667
      2.566667
      19.566667
    
    
      65874
      r517
      V
      F10005_SI
      0
      0.425
      1393
      363
      258
      2014
      38.067336
      370.816667
      102.433333
      -223.066667
      -2.566667
      2.566667
      102.433333
    
    
      65875
      r732
      V
      F10005_SI
      0
      0.532
      1126
      186
      202
      1514
      28.616657
      370.816667
      -18.566667
      -335.066667
      -2.566667
      2.566667
      18.566667
    
    
      65876
      r737
      V
      F10005_SI
      0
      0.457
      1353
      148
      118
      1619
      30.601299
      370.816667
      27.433333
      -503.066667
      -2.566667
      2.566667
      27.433333
    
    
      65877
      r799
      V
      F10005_SI
      0
      0.488
      1609
      272
      250
      2131
      40.278795
      370.816667
      19.433333
      -239.066667
      -2.566667
      2.566667
      19.433333
    
    
      65878
      r820
      V
      F10005_SI
      0
      0.506
      2337
      248
      245
      2830
      53.490845
      370.816667
      0.433333
      -249.066667
      -2.566667
      2.566667
      0.433333
    
    
      65879
      r85
      V
      F10005_SI
      0
      0.562
      1859
      234
      289
      2382
      45.023036
      370.816667
      -57.566667
      -161.066667
      -2.566667
      2.566667
      57.566667
    
    
      65880
      w114
      V
      F10005_SI
      0
      0.397
      1095
      472
      300
      1867
      35.288836
      370.816667
      169.433333
      -139.066667
      -2.566667
      2.566667
      169.433333
    
    
      65881
      w38
      V
      F10005_SI
      0
      0.514
      3793
      704
      727
      5224
      98.740697
      370.816667
      -25.566667
      714.933333
      -2.566667
      2.566667
      25.566667
    
    
      65882
      w47
      V
      F10005_SI
      0
      0.495
      2971
      772
      737
      4480
      84.678086
      370.816667
      32.433333
      734.933333
      -2.566667
      2.566667
      32.433333
    
    
      65883
      w52
      V
      F10005_SI
      0
      0.495
      2136
      225
      212
      2573
      48.633196
      370.816667
      10.433333
      -315.066667
      -2.566667
      2.566667
      10.433333
    
    
      65884
      w55
      V
      F10005_SI
      1
      0.773
      3809
      228
      750
      4787
      90.480803
      370.816667
      -524.566667
      760.933333
      -2.566667
      2.566667
      524.566667
    
    
      65885
      w59
      V
      F10005_SI
      0
      0.567
      1531
      329
      417
      2277
      43.038393
      370.816667
      -90.566667
      94.933333
      -2.566667
      2.566667
      90.566667
    
    
      65886
      w64
      V
      F10005_SI
      0
      0.486
      3494
      423
      387
      4304
      81.351447
      370.816667
      33.433333
      34.933333
      -2.566667
      2.566667
      33.433333
    
    
      65887
      w68
      V
      F10005_SI
      1
      0.438
      2826
      639
      487
      3952
      74.698169
      370.816667
      149.433333
      234.933333
      -2.566667
      2.566667
      149.433333
    
    
      65888
      w76
      V
      F10005_SI
      0
      0.514
      2561
      475
      488
      3524
      66.608387
      370.816667
      -15.566667
      236.933333
      -2.566667
      2.566667
      15.566667
    
    
      65889
      w79
      V
      F10005_SI
      1
      0.633
      2754
      392
      654
      3800
      71.825162
      370.816667
      -264.566667
      568.933333
      -2.566667
      2.566667
      264.566667



In [ ]:



In [ ]:



In [17]:

    
marenRawCounts.columns









    Out[17]:





Index([            u'line',    u'mating_status',        u'fusion_id',
       u'flag_AI_combined',    u'q5_mean_theta',         u'sum_both',
               u'sum_line',       u'sum_tester',        u'sum_total',
               u'mean_apn',               u'mu',         u'cis_line',
             u'trans_line',       u'cis_tester',     u'trans_tester',
                u'mag_cis'],
      dtype='object')



In [15]:

    
meanByMsLine = marenRawCounts[['mean_apn', 'cis_line', 'mating_status', 'line']].groupby(['mating_status', 'line']).mean()
meanByMsLine.columns









    Out[15]:





Index([u'mean_apn', u'cis_line'], dtype='object')



In [60]:

    
meanByMsLine.plot(kind='scatter', x='mean_apn', y='cis_line')









    Out[60]:





<matplotlib.axes._subplots.AxesSubplot at 0x7f2b4a987450>



In [61]:



In [68]:

    
def cisAPN(df, fusion, value='cis_line', xcutoff='>=150', ycutoff='<=-180'):
    """ Plot effects vs mean apn"""
    # Pull out fusion of interest
    dfSub = marenRawCounts[marenRawCounts['fusion_id'] == fusion]

    # Make scatter plot
    fig, ax = plt.subplots(1, 1, figsize=(10, 10))
    dfSub.plot(kind='scatter', x='mean_apn', y='cis_line', ax=ax, title=fusion)

    # Annotate outliers
    xc = 
    filt = dfSub.loc[(dfSub[value] eval(ycutoff)) | (dfSub['mean_apn'] eval(xcutoff)), ['line', 'mating_status', 'mean_apn', 'cis_line']]
    for row in filt.values:
        line, ms, apn, cis = row
        ax.annotate(line + '_' + ms, xy=(apn, cis))

    fname = pjoin(PROJ, 'pipeline_output/cis_effects/scatter_plot_{}_{}_v_meanApn.png'.format(fusion, value))
    plt.savefig(fname, bbox_inches='tight')









    



  File "<ipython-input-68-ab89754b1cf2>", line 11
    filt = dfSub.loc[(dfSub[value] eval(ycutoff)) | (dfSub['mean_apn'] eval(xcutoff)), ['line', 'mating_status', 'mean_apn', 'cis_line']]
                                      ^
SyntaxError: invalid syntax



In [70]:

    
eval("{} == 'M'".format(marenRawCounts['mating_status']))









    



  File "<string>", line 1
    0         M
              ^
SyntaxError: invalid syntax



In [ ]:

	line	mating_status	fusion_id	flag_AI_combined	q5_mean_theta	sum_both	sum_line	sum_tester	sum_total	mean_apn	mu	cis_line	trans_line	cis_tester	trans_tester	mag_cis
10	r101	M	F10005_SI	0	0.475	1274	155	133	1562	29.523922	353.733333	28.666667	-448.133333	6.666667	-6.666667	28.666667
11	r280	M	F10005_SI	0	0.545	1208	126	143	1477	27.917307	353.733333	-10.333333	-428.133333	6.666667	-6.666667	10.333333
12	r315	M	F10005_SI	0	0.490	1231	237	218	1686	31.867690	353.733333	25.666667	-278.133333	6.666667	-6.666667	25.666667
13	r324	M	F10005_SI	1	0.450	2554	349	276	3179	60.087419	353.733333	79.666667	-162.133333	6.666667	-6.666667	79.666667
14	r335	M	F10005_SI	0	0.514	1339	215	217	1771	33.474306	353.733333	4.666667	-280.133333	6.666667	-6.666667	4.666667
15	r340	M	F10005_SI	1	0.241	1897	332	102	2331	44.059067	353.733333	236.666667	-510.133333	6.666667	-6.666667	236.666667
16	r357	M	F10005_SI	1	0.542	2797	358	408	3563	67.345540	353.733333	-43.333333	101.866667	6.666667	-6.666667	43.333333
17	r358	M	F10005_SI	0	0.480	1494	185	162	1841	34.797401	353.733333	29.666667	-390.133333	6.666667	-6.666667	29.666667
18	r365	M	F10005_SI	0	0.508	2313	385	386	3084	58.291790	353.733333	5.666667	57.866667	6.666667	-6.666667	5.666667
19	r373	M	F10005_SI	0	0.450	1546	146	113	1805	34.116952	353.733333	39.666667	-488.133333	6.666667	-6.666667	39.666667
20	r374	M	F10005_SI	0	0.412	1703	231	155	2089	39.484938	353.733333	82.666667	-404.133333	6.666667	-6.666667	82.666667
21	r380	M	F10005_SI	0	0.472	1584	295	254	2133	40.316598	353.733333	47.666667	-206.133333	6.666667	-6.666667	47.666667
22	r427	M	F10005_SI	0	0.501	4246	522	510	5278	99.761370	353.733333	18.666667	305.866667	6.666667	-6.666667	18.666667
23	r491	M	F10005_SI	0	0.484	3827	657	599	5083	96.075605	353.733333	64.666667	483.866667	6.666667	-6.666667	64.666667
24	r517	M	F10005_SI	0	0.480	2591	580	520	3691	69.764914	353.733333	66.666667	325.866667	6.666667	-6.666667	66.666667
25	r732	M	F10005_SI	0	0.471	1281	276	237	1794	33.909037	353.733333	45.666667	-240.133333	6.666667	-6.666667	45.666667
26	r737	M	F10005_SI	0	0.566	1576	126	156	1858	35.118724	353.733333	-23.333333	-402.133333	6.666667	-6.666667	23.333333
27	r799	M	F10005_SI	0	0.487	2798	520	480	3798	71.787360	353.733333	46.666667	245.866667	6.666667	-6.666667	46.666667
28	r820	M	F10005_SI	0	0.442	1880	236	180	2296	43.397519	353.733333	62.666667	-354.133333	6.666667	-6.666667	62.666667
29	r85	M	F10005_SI	0	0.606	1086	131	191	1408	26.613113	353.733333	-53.333333	-332.133333	6.666667	-6.666667	53.333333
30	w114	M	F10005_SI	0	0.524	1042	230	243	1515	28.635558	353.733333	-6.333333	-228.133333	6.666667	-6.666667	6.333333
31	w38	M	F10005_SI	1	0.537	2046	444	501	2991	56.533963	353.733333	-50.333333	287.866667	6.666667	-6.666667	50.333333
32	w47	M	F10005_SI	0	0.504	5492	1425	1417	8334	157.523922	353.733333	14.666667	2119.866667	6.666667	-6.666667	14.666667
33	w52	M	F10005_SI	0	0.501	3848	407	397	4652	87.929120	353.733333	16.666667	79.866667	6.666667	-6.666667	16.666667
34	w55	M	F10005_SI	1	0.784	4191	252	882	5325	100.649734	353.733333	-623.333333	1049.866667	6.666667	-6.666667	623.333333
35	w59	M	F10005_SI	0	0.481	1429	367	329	2125	40.165387	353.733333	44.666667	-56.133333	6.666667	-6.666667	44.666667
36	w64	M	F10005_SI	0	0.467	2153	284	239	2676	50.580035	353.733333	51.666667	-236.133333	6.666667	-6.666667	51.666667
37	w68	M	F10005_SI	0	0.460	1915	371	305	2591	48.973420	353.733333	72.666667	-104.133333	6.666667	-6.666667	72.666667
38	w76	M	F10005_SI	0	0.578	1757	286	376	2419	45.722386	353.733333	-83.333333	37.866667	6.666667	-6.666667	83.333333
39	w79	M	F10005_SI	0	0.611	2596	384	583	3563	67.345540	353.733333	-192.333333	451.866667	6.666667	-6.666667	192.333333
65860	r101	V	F10005_SI	1	0.461	4444	454	377	5275	99.704666	370.816667	74.433333	14.933333	-2.566667	2.566667	74.433333
65861	r280	V	F10005_SI	0	0.481	1472	236	210	1918	36.252806	370.816667	23.433333	-319.066667	-2.566667	2.566667	23.433333
65862	r315	V	F10005_SI	1	0.436	2754	674	508	3936	74.395747	370.816667	163.433333	276.933333	-2.566667	2.566667	163.433333
65863	r324	V	F10005_SI	0	0.441	2473	331	252	3056	57.762552	370.816667	76.433333	-235.066667	-2.566667	2.566667	76.433333
65864	r335	V	F10005_SI	0	0.498	1359	185	175	1719	32.491435	370.816667	7.433333	-389.066667	-2.566667	2.566667	7.433333
65865	r340	V	F10005_SI	0	0.511	1661	166	165	1992	37.651506	370.816667	-1.566667	-409.066667	-2.566667	2.566667	1.566667
65866	r357	V	F10005_SI	0	0.520	2373	322	335	3030	57.271116	370.816667	-15.566667	-69.066667	-2.566667	2.566667	15.566667
65867	r358	V	F10005_SI	0	0.526	2075	238	252	2565	48.481985	370.816667	-16.566667	-235.066667	-2.566667	2.566667	16.566667
65868	r365	V	F10005_SI	0	0.500	3119	489	474	4082	77.155346	370.816667	12.433333	208.933333	-2.566667	2.566667	12.433333
65869	r373	V	F10005_SI	0	0.426	3498	371	266	4135	78.157118	370.816667	102.433333	-207.066667	-2.566667	2.566667	102.433333
65870	r374	V	F10005_SI	0	0.527	2930	401	433	3764	71.144714	370.816667	-34.566667	126.933333	-2.566667	2.566667	34.566667
65871	r380	V	F10005_SI	0	0.477	1724	371	327	2422	45.779090	370.816667	41.433333	-85.066667	-2.566667	2.566667	41.433333
65872	r427	V	F10005_SI	0	0.479	3023	367	326	3716	70.237448	370.816667	38.433333	-87.066667	-2.566667	2.566667	38.433333
65873	r491	V	F10005_SI	0	0.517	2550	448	465	3463	65.455405	370.816667	-19.566667	190.933333	-2.566667	2.566667	19.566667
65874	r517	V	F10005_SI	0	0.425	1393	363	258	2014	38.067336	370.816667	102.433333	-223.066667	-2.566667	2.566667	102.433333
65875	r732	V	F10005_SI	0	0.532	1126	186	202	1514	28.616657	370.816667	-18.566667	-335.066667	-2.566667	2.566667	18.566667
65876	r737	V	F10005_SI	0	0.457	1353	148	118	1619	30.601299	370.816667	27.433333	-503.066667	-2.566667	2.566667	27.433333
65877	r799	V	F10005_SI	0	0.488	1609	272	250	2131	40.278795	370.816667	19.433333	-239.066667	-2.566667	2.566667	19.433333
65878	r820	V	F10005_SI	0	0.506	2337	248	245	2830	53.490845	370.816667	0.433333	-249.066667	-2.566667	2.566667	0.433333
65879	r85	V	F10005_SI	0	0.562	1859	234	289	2382	45.023036	370.816667	-57.566667	-161.066667	-2.566667	2.566667	57.566667
65880	w114	V	F10005_SI	0	0.397	1095	472	300	1867	35.288836	370.816667	169.433333	-139.066667	-2.566667	2.566667	169.433333
65881	w38	V	F10005_SI	0	0.514	3793	704	727	5224	98.740697	370.816667	-25.566667	714.933333	-2.566667	2.566667	25.566667
65882	w47	V	F10005_SI	0	0.495	2971	772	737	4480	84.678086	370.816667	32.433333	734.933333	-2.566667	2.566667	32.433333
65883	w52	V	F10005_SI	0	0.495	2136	225	212	2573	48.633196	370.816667	10.433333	-315.066667	-2.566667	2.566667	10.433333
65884	w55	V	F10005_SI	1	0.773	3809	228	750	4787	90.480803	370.816667	-524.566667	760.933333	-2.566667	2.566667	524.566667
65885	w59	V	F10005_SI	0	0.567	1531	329	417	2277	43.038393	370.816667	-90.566667	94.933333	-2.566667	2.566667	90.566667
65886	w64	V	F10005_SI	0	0.486	3494	423	387	4304	81.351447	370.816667	33.433333	34.933333	-2.566667	2.566667	33.433333
65887	w68	V	F10005_SI	1	0.438	2826	639	487	3952	74.698169	370.816667	149.433333	234.933333	-2.566667	2.566667	149.433333
65888	w76	V	F10005_SI	0	0.514	2561	475	488	3524	66.608387	370.816667	-15.566667	236.933333	-2.566667	2.566667	15.566667
65889	w79	V	F10005_SI	1	0.633	2754	392	654	3800	71.825162	370.816667	-264.566667	568.933333	-2.566667	2.566667	264.566667