Table of Contents


In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tissue_enrichment_analysis as tea
import pyrnaseq_graphics as rsq
import scipy.stats as stats
import matplotlib as mpl

from IPython.core.display import HTML


# bokeh
import bokeh.charts
import bokeh.charts.utils
import bokeh.io
import bokeh.models
import bokeh.palettes
import bokeh.plotting
from bokeh.plotting import figure
from bokeh.resources import CDN
from bokeh.embed import file_html

# Display graphics in this notebook
bokeh.io.output_notebook()

%matplotlib inline

# This enables high res graphics inline (only use with static plots (non-Bokeh))
# SVG is preferred, but there is a bug in Jupyter with vertical lines
%config InlineBackend.figure_formats = {'png', 'retina'}

# from IPython.display import set_matplotlib_formats
# set_matplotlib_formats('png')
rc = {'lines.linewidth': 2, 
      'axes.labelsize': 22, 
      'axes.titlesize': 20, 
      'legend.fontsize': 'x-large',
      'axes.facecolor': 'DFDFE5'}
sns.set_context('paper', rc=rc)
sns.set_style('dark', rc=rc)

import matplotlib.cm as cm
from matplotlib import rc
rc('text', usetex=True)
import matplotlib.patheffects as path_effects


Loading BokehJS ...

In [2]:
qval = .1  # qvalue from regression

In [3]:
output_aging = '../output/raw_aging_plots/'
output_genotype = '../output/raw_genotype_plots/'
output_interaction = '../output/raw_interaction_plots/'
output_sperm = '../output/raw_sperm_plots/'

# gene_lists from sleuth
# tpm vals for PCA
path = '../input/sleuth_results/'
# pos beta means high old adults
dfBetaA = pd.read_csv(path + "si2_aging_analysis.csv", comment='#')
dfBetaA.dropna(inplace=True)
# pos beta means high in fog2
dfBetaG = pd.read_csv(path + "si3_genotype_analysis.csv", comment='#')
dfBetaG.dropna(inplace=True)
# pos beta means high in fog2-aged
dfBetaAG = pd.read_csv(path + "si4_interaction_analysis.csv", comment='#')
dfBetaAG.dropna(inplace=True)
# likelihood ratio test results
dfLRT = pd.read_csv(path + "lrt.csv")
dfLRT.dropna(inplace=True)

# sort by target_id
dfBetaA.sort_values('target_id', inplace=True)
dfBetaA.reset_index(inplace=True)
dfBetaG.sort_values('target_id', inplace=True)
dfBetaG.reset_index(inplace=True)
dfBetaAG.sort_values('target_id', inplace=True)
dfBetaAG.reset_index(inplace=True)
dfLRT.sort_values('target_id', inplace=True)
dfLRT.reset_index(inplace=True)

tissue_df = tea.fetch_dictionary()
phenotype_df = pd.read_csv('../input/phenotype_ontology.csv')
go_df = pd.read_csv('../input/go_dictionary.csv')

# transcription factors:
tf = pd.read_csv('../input/tf_list.csv')

# some non-essential parameters
xticksize = 15.5
xlabelsize = 23
ylabelsize_volcano = 23
volcano_legendsize = 10

rc('text', usetex=True)
# plot size
ms = 4

In [4]:
def make_expression_axes(tooltips, title,
                          xlabel, ylabel):
    """A function to plot the bokeh single mutant comparisons."""
    # Make the hover tool
    hover = bokeh.models.HoverTool(tooltips=tooltips,
                                   names=['circles'])

    # Create figure
    p = bokeh.plotting.figure(title=title, plot_width=650, 
                              plot_height=450)

    p.xgrid.grid_line_color = 'white'
    p.ygrid.grid_line_color = 'white'
    p.xaxis.axis_label = xlabel
    p.yaxis.axis_label = ylabel

    # Add the hover tool
    p.add_tools(hover)
    return p


def add_points(p, df1, x, y, se_x, color='blue', alpha=0.2, outline=False):
    # Define colors in a dictionary to access them with
    # the key from the pandas groupby funciton.
    df = df1.copy()
    transformed_q = -df[y].apply(np.log10)
    df['transformed_q'] = transformed_q

    source1 = bokeh.models.ColumnDataSource(df)

    # Specify data source
    p.circle(x=x, y='transformed_q', size=7,
             alpha=alpha, source=source1,
             color=color, name='circles')
    if outline:
        p.circle(x=x, y='transformed_q', size=7,
                 alpha=1,
                 source=source1, color='black',
                 fill_color=None, name='outlines')

    # prettify
    p.background_fill_color = "#DFDFE5"
    p.background_fill_alpha = 0.5
    
    return p

def selector(df):
    """A function to separate tfs from everything else"""
    sig = (df.qval < 0.1)# & (dfBetaA.b.abs() > 0.5)
    not_tf = (~df.target_id.isin(tf.target_id))
    is_tf = (df.target_id.isin(tf.target_id))
    to_plot_not = df[sig & not_tf]
    to_plot_yes = df[sig & is_tf]
    return to_plot_not, to_plot_yes

In [5]:
# What pops up on hover?
tooltips = [('ext_gene', '@ext_gene')]

p = make_expression_axes( tooltips, 'Aging Volcano Plot',
                         'Beta Coefficient (log-fold change)', '-log(Q)')

to_plot_not, to_plot_yes = selector(dfBetaA)

p = add_points(p, to_plot_not, 'b', 'qval', 'se_b', color='#1a9641')
p = add_points(p, to_plot_yes, 'b', 'qval', 'se_b', color='#fc8d59', alpha=0.6, outline=True)

html = file_html(p, CDN, "my plot")
HTML(html)


Out[5]:
my plot

In [6]:
# What pops up on hover?
tooltips = [('ext_gene', '@ext_gene')]

p = make_expression_axes( tooltips, 'Genotype Volcano Plot',
                         'Beta Coefficient (log-fold change)', '-log(Q)')

to_plot_not, to_plot_yes = selector(dfBetaG)

p = add_points(p, to_plot_not, 'b', 'qval', 'se_b', color='#1a9641')
p = add_points(p, to_plot_yes, 'b', 'qval', 'se_b', color='#fc8d59', alpha=0.6, outline=True)

html = file_html(p, CDN, "my plot")
HTML(html)


Out[6]:
my plot

In [7]:
# What pops up on hover?
tooltips = [('ext_gene', '@ext_gene')]

p = make_expression_axes( tooltips, 'Aging::Genotype Volcano Plot',
                         'Beta Coefficient (log-fold change)', '-log(Q)')

to_plot_not, to_plot_yes = selector(dfBetaAG)

p = add_points(p, to_plot_not, 'b', 'qval', 'se_b', color='#1a9641')
p = add_points(p, to_plot_yes, 'b', 'qval', 'se_b', color='#fc8d59', alpha=0.6, outline=True)

html = file_html(p, CDN, "my plot")
HTML(html)


Out[7]:
my plot

In [ ]:
z