In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import tissue_enrichment_analysis as tea
import pyrnaseq_graphics as rsq
import scipy.stats as stats
import matplotlib as mpl
from IPython.core.display import HTML
# bokeh
import bokeh.charts
import bokeh.charts.utils
import bokeh.io
import bokeh.models
import bokeh.palettes
import bokeh.plotting
from bokeh.plotting import figure
from bokeh.resources import CDN
from bokeh.embed import file_html
# Display graphics in this notebook
bokeh.io.output_notebook()
%matplotlib inline
# This enables high res graphics inline (only use with static plots (non-Bokeh))
# SVG is preferred, but there is a bug in Jupyter with vertical lines
%config InlineBackend.figure_formats = {'png', 'retina'}
# from IPython.display import set_matplotlib_formats
# set_matplotlib_formats('png')
rc = {'lines.linewidth': 2,
'axes.labelsize': 22,
'axes.titlesize': 20,
'legend.fontsize': 'x-large',
'axes.facecolor': 'DFDFE5'}
sns.set_context('paper', rc=rc)
sns.set_style('dark', rc=rc)
import matplotlib.cm as cm
from matplotlib import rc
rc('text', usetex=True)
import matplotlib.patheffects as path_effects
In [2]:
qval = .1 # qvalue from regression
In [3]:
output_aging = '../output/raw_aging_plots/'
output_genotype = '../output/raw_genotype_plots/'
output_interaction = '../output/raw_interaction_plots/'
output_sperm = '../output/raw_sperm_plots/'
# gene_lists from sleuth
# tpm vals for PCA
path = '../input/sleuth_results/'
# pos beta means high old adults
dfBetaA = pd.read_csv(path + "si2_aging_analysis.csv", comment='#')
dfBetaA.dropna(inplace=True)
# pos beta means high in fog2
dfBetaG = pd.read_csv(path + "si3_genotype_analysis.csv", comment='#')
dfBetaG.dropna(inplace=True)
# pos beta means high in fog2-aged
dfBetaAG = pd.read_csv(path + "si4_interaction_analysis.csv", comment='#')
dfBetaAG.dropna(inplace=True)
# likelihood ratio test results
dfLRT = pd.read_csv(path + "lrt.csv")
dfLRT.dropna(inplace=True)
# sort by target_id
dfBetaA.sort_values('target_id', inplace=True)
dfBetaA.reset_index(inplace=True)
dfBetaG.sort_values('target_id', inplace=True)
dfBetaG.reset_index(inplace=True)
dfBetaAG.sort_values('target_id', inplace=True)
dfBetaAG.reset_index(inplace=True)
dfLRT.sort_values('target_id', inplace=True)
dfLRT.reset_index(inplace=True)
tissue_df = tea.fetch_dictionary()
phenotype_df = pd.read_csv('../input/phenotype_ontology.csv')
go_df = pd.read_csv('../input/go_dictionary.csv')
# transcription factors:
tf = pd.read_csv('../input/tf_list.csv')
# some non-essential parameters
xticksize = 15.5
xlabelsize = 23
ylabelsize_volcano = 23
volcano_legendsize = 10
rc('text', usetex=True)
# plot size
ms = 4
In [4]:
def make_expression_axes(tooltips, title,
xlabel, ylabel):
"""A function to plot the bokeh single mutant comparisons."""
# Make the hover tool
hover = bokeh.models.HoverTool(tooltips=tooltips,
names=['circles'])
# Create figure
p = bokeh.plotting.figure(title=title, plot_width=650,
plot_height=450)
p.xgrid.grid_line_color = 'white'
p.ygrid.grid_line_color = 'white'
p.xaxis.axis_label = xlabel
p.yaxis.axis_label = ylabel
# Add the hover tool
p.add_tools(hover)
return p
def add_points(p, df1, x, y, se_x, color='blue', alpha=0.2, outline=False):
# Define colors in a dictionary to access them with
# the key from the pandas groupby funciton.
df = df1.copy()
transformed_q = -df[y].apply(np.log10)
df['transformed_q'] = transformed_q
source1 = bokeh.models.ColumnDataSource(df)
# Specify data source
p.circle(x=x, y='transformed_q', size=7,
alpha=alpha, source=source1,
color=color, name='circles')
if outline:
p.circle(x=x, y='transformed_q', size=7,
alpha=1,
source=source1, color='black',
fill_color=None, name='outlines')
# prettify
p.background_fill_color = "#DFDFE5"
p.background_fill_alpha = 0.5
return p
def selector(df):
"""A function to separate tfs from everything else"""
sig = (df.qval < 0.1)# & (dfBetaA.b.abs() > 0.5)
not_tf = (~df.target_id.isin(tf.target_id))
is_tf = (df.target_id.isin(tf.target_id))
to_plot_not = df[sig & not_tf]
to_plot_yes = df[sig & is_tf]
return to_plot_not, to_plot_yes
In [5]:
# What pops up on hover?
tooltips = [('ext_gene', '@ext_gene')]
p = make_expression_axes( tooltips, 'Aging Volcano Plot',
'Beta Coefficient (log-fold change)', '-log(Q)')
to_plot_not, to_plot_yes = selector(dfBetaA)
p = add_points(p, to_plot_not, 'b', 'qval', 'se_b', color='#1a9641')
p = add_points(p, to_plot_yes, 'b', 'qval', 'se_b', color='#fc8d59', alpha=0.6, outline=True)
html = file_html(p, CDN, "my plot")
HTML(html)
Out[5]:
In [6]:
# What pops up on hover?
tooltips = [('ext_gene', '@ext_gene')]
p = make_expression_axes( tooltips, 'Genotype Volcano Plot',
'Beta Coefficient (log-fold change)', '-log(Q)')
to_plot_not, to_plot_yes = selector(dfBetaG)
p = add_points(p, to_plot_not, 'b', 'qval', 'se_b', color='#1a9641')
p = add_points(p, to_plot_yes, 'b', 'qval', 'se_b', color='#fc8d59', alpha=0.6, outline=True)
html = file_html(p, CDN, "my plot")
HTML(html)
Out[6]:
In [7]:
# What pops up on hover?
tooltips = [('ext_gene', '@ext_gene')]
p = make_expression_axes( tooltips, 'Aging::Genotype Volcano Plot',
'Beta Coefficient (log-fold change)', '-log(Q)')
to_plot_not, to_plot_yes = selector(dfBetaAG)
p = add_points(p, to_plot_not, 'b', 'qval', 'se_b', color='#1a9641')
p = add_points(p, to_plot_yes, 'b', 'qval', 'se_b', color='#fc8d59', alpha=0.6, outline=True)
html = file_html(p, CDN, "my plot")
HTML(html)
Out[7]:
In [ ]:
z