A brief tutorial for the WormBase Enrichment Suite, Python interface

Loading the required libraries


In [1]:
# this first cell imports the libraries we typically use for data science in Python
import pandas as pd
import numpy as np

# this is the WormBase Enrichment Suite module (previously just TEA)
import tissue_enrichment_analysis as ea

# plotting libraries
import matplotlib.pyplot as plt
import seaborn as sns

%matplotlib inline

# This enables SVG graphics inline. 
%config InlineBackend.figure_formats = {'png', 'retina'}

Loading your gene list and fetching the dictionaries


In [2]:
# load your DE genes (in WBID format) to a pandas dataframe or to a list
df = pd.read_csv('EVN_wbids.csv')

In [3]:
# fetch the dictionaries using the fetch_dictionary function:
tissue = ea.fetch_dictionary('tissue')
phenotype = ea.fetch_dictionary('phenotype')
go = ea.fetch_dictionary('go')

Analyzing your gene list


In [4]:
# place the dictionaries into a hash
frames = {'tissue': tissue, 'phenotype': phenotype, 'go': go}

# test the list of genes against each dictionary and store the
# results in a hash called results
# NOTE: The enrichment_analysis function only returns Stat. Sig. Results!
result = {}
for analysis, dictionary in frames.items():
    result[analysis] = ea.enrichment_analysis(df.gene_name, dictionary, show=False, alpha=10**-1)

Plotting the results


In [5]:
# make the figure in the paper:
fig, ax = plt.subplots(nrows=3, figsize=(8, 10))
i= 0

# go through the results hash:
for t, r in result.items():
    # calculate the negative log of the Q-values and store
    r['logQ'] = -r['Q value'].apply(np.log10)
    # remove np.infinites with np.nan
    r.logQ.replace([np.inf], np.nan, inplace=True)
    # remove np.nan with 70 (after 10**-64, the hypergeometric function crashes and returns 0)
    r.logQ.replace(np.nan, 70, inplace=True)
    
    # call the plotting function in the Enrichment Suite to plot the results
    ea.plot_enrichment_results(r, title=t, analysis=t, ax=ax[i], y='logQ', n_bars=10)

    # prettify axes
    ax[i].set_ylabel(t)
    if i != 2:
        ax[i].set_xlabel('')
    else:
        ax[i].set_xlabel(r'$-\log_{10}{Q}$')
    i += 1

# save figure
plt.savefig('Enrichment_Results.svg', bbox_inches='tight')