In [1]:
# this first cell imports the libraries we typically use for data science in Python
import pandas as pd
import numpy as np
# this is the WormBase Enrichment Suite module (previously just TEA)
import tissue_enrichment_analysis as ea
# plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline
# This enables SVG graphics inline.
%config InlineBackend.figure_formats = {'png', 'retina'}
In [2]:
# load your DE genes (in WBID format) to a pandas dataframe or to a list
df = pd.read_csv('EVN_wbids.csv')
In [3]:
# fetch the dictionaries using the fetch_dictionary function:
tissue = ea.fetch_dictionary('tissue')
phenotype = ea.fetch_dictionary('phenotype')
go = ea.fetch_dictionary('go')
In [4]:
# place the dictionaries into a hash
frames = {'tissue': tissue, 'phenotype': phenotype, 'go': go}
# test the list of genes against each dictionary and store the
# results in a hash called results
# NOTE: The enrichment_analysis function only returns Stat. Sig. Results!
result = {}
for analysis, dictionary in frames.items():
result[analysis] = ea.enrichment_analysis(df.gene_name, dictionary, show=False, alpha=10**-1)
In [5]:
# make the figure in the paper:
fig, ax = plt.subplots(nrows=3, figsize=(8, 10))
i= 0
# go through the results hash:
for t, r in result.items():
# calculate the negative log of the Q-values and store
r['logQ'] = -r['Q value'].apply(np.log10)
# remove np.infinites with np.nan
r.logQ.replace([np.inf], np.nan, inplace=True)
# remove np.nan with 70 (after 10**-64, the hypergeometric function crashes and returns 0)
r.logQ.replace(np.nan, 70, inplace=True)
# call the plotting function in the Enrichment Suite to plot the results
ea.plot_enrichment_results(r, title=t, analysis=t, ax=ax[i], y='logQ', n_bars=10)
# prettify axes
ax[i].set_ylabel(t)
if i != 2:
ax[i].set_xlabel('')
else:
ax[i].set_xlabel(r'$-\log_{10}{Q}$')
i += 1
# save figure
plt.savefig('Enrichment_Results.svg', bbox_inches='tight')