Figure. Functional Annotation



In [1]:

    
import cPickle
import glob
import os
import random
import subprocess

import cdpybio as cpb
from ipyparallel import Client
import matplotlib as mpl
import matplotlib.gridspec as gridspec
import matplotlib.pyplot as plt
import networkx as nx
import numpy as np
import pandas as pd
import pybedtools as pbt
from scipy.stats import fisher_exact
import seaborn as sns
import statsmodels.api as sm
import statsmodels.formula.api as smf
import statsmodels as sms
import tabix
import vcf as pyvcf
import weblogolib as logo

import cardipspy as cpy
import ciepy

%matplotlib inline
%load_ext rpy2.ipython

dy_name = 'figure_functional_annotation'

import socket
if socket.gethostname() == 'fl-hn1' or socket.gethostname() == 'fl-hn2':
    dy = os.path.join(ciepy.root, 'sandbox', 'tmp', dy_name)
    cpy.makedir(dy)
    pbt.set_tempdir(dy)
    
outdir = os.path.join(ciepy.root, 'output', dy_name)
cpy.makedir(outdir)

private_outdir = os.path.join(ciepy.root, 'private_output', dy_name)
cpy.makedir(private_outdir)



In [2]:

    
sns.set_style('whitegrid')



In [3]:

    
fn = os.path.join(ciepy.root, 'output', 'functional_annotation_analysis', 'encode_dnase_res.tsv')
encode_dnase_res = pd.read_table(fn, index_col=0)

fn = os.path.join(ciepy.root, 'output', 'functional_annotation_analysis', 'roadmap_dnase_res.tsv')
roadmap_dnase_res = pd.read_table(fn, index_col=0)

fn = os.path.join(os.path.split(cpy.roadmap_15_state_annotation)[0], 'epigenome_metadata.tsv')
roadmap_meta = pd.read_table(fn, index_col=0)

fn = os.path.join(os.path.split(cpy.roadmap_15_state_annotation)[0], 'EIDlegend.txt')
roadmap_ids = pd.read_table(fn, squeeze=True, index_col=0, header=None)

fn = os.path.join(ciepy.root, 'output', 'functional_annotation_analysis', 'encode_stem_cell_chip_seq_res.tsv')
tf_res = pd.read_table(fn, index_col=0)

Process data

I'm going to subset the data a bit for plotting and print out some stats for the paper. I also need to rename the cell lines and categories to have some consistency across the two projects.

DHS results

Roadmap



In [4]:

    
roadmap_dnase_res = roadmap_dnase_res.join(roadmap_meta[['GROUP', 'TYPE', 'ANATOMY']])
roadmap_dnase_res['name'] = roadmap_ids[roadmap_dnase_res.index].values



In [5]:

    
print('Tested {} Roadmap cell types.'.format(roadmap_dnase_res.shape[0]))









    



Tested 53 Roadmap cell types.



In [6]:

    
roadmap_meta.TYPE.value_counts()









    Out[6]:





PrimaryTissue     50
PrimaryCulture    38
PrimaryCell       25
ESCDerived         9
CellLine           5
Name: TYPE, dtype: int64

I can convert some of these labels directly, but some I have to break down. In particular, there are a lot of things categorized as 'PrimaryCulture'.



In [7]:

    
sample_type_conv = {
    'PrimaryTissue':'Primary tissue',
    'PrimaryCulture':'Primary culture',
    'PrimaryCell':'Primary cell',
    'ESCDerived':'In vitro differentiated',
    'CellLine':'Immortalized line',
}



In [8]:

    
roadmap_dnase_res['my_cell_type'] = [sample_type_conv[x] for x in roadmap_dnase_res.TYPE]

I want to specifically label the stem and fetal cells from Roadmap.



In [9]:

    
roadmap_dnase_res.ix[roadmap_dnase_res.ANATOMY == 'IPSC', 'my_cell_type'] = 'Stem cell'
roadmap_dnase_res.ix[roadmap_dnase_res.ANATOMY == 'ESC', 'my_cell_type'] = 'Stem cell'
roadmap_dnase_res.ix[(roadmap_dnase_res.name.apply(lambda x: 'fetal' in x.lower())) & 
                     (roadmap_dnase_res.TYPE == 'PrimaryTissue'), 'my_cell_type'] = 'Primary fetal tissue'

Now I'll choose the top 25 cell types and rename them with more reasonable and consistent names. For instance, ENCODE has both H1-hESC and H9 while Roadmap says "H1 Cell Line". These should be standardized. I also need to standardize capitalization and weird stuff like "skin02" in the Roadmap names.



In [10]:

    
roadmap_dnase_res_f = roadmap_dnase_res.sort_values(by='pvalue', ascending=False).tail(25)

name_conv = {
    'Primary hematopoietic stem cells G-CSF-mobilized Male':'Hematopoietic stem cells (G-CSF mobilized)',
    'Primary monocytes fromperipheralblood':'Peripheral blood monocytes',
    'Foreskin Fibroblast Primary Cells skin02':'Foreskin fibroblasts',
    'Fetal Adrenal Gland':'Fetal adrenal gland',
    'A549 EtOH 0.02pct Lung Carcinoma Cell Line':'A549 lung carcinoma line',
    'HSMM Skeletal Muscle Myoblasts Cell Line':'HSMM skeletal muscle myoblasts line',
    'Fetal Intestine Large':'Fetal large intestine',
    'NHLF Lung Fibroblast Primary Cells':'NHLF lung fibroblasts',
    'NH-A Astrocytes Cell Line':'NH-A astrocytes',
    'HUVEC Umbilical Vein Endothelial Cells Cell Line':'HUVEC umbilical vein endothelial cells',
    'Fetal Stomach':'Fetal stomach',
    'Primary hematopoietic stem cells G-CSF-mobilized Female':'Hematopoietic stem cells (G-CSF mobilized)',
    'Fetal Kidney':'Fetal kidney',
    'Breast variant Human Mammary Epithelial Cells (vHMEC)':'vHMEC breast variant mammary epithelial cells',
    'Fetal Lung':'Fetal lung',
    'Fetal Intestine Small':'Fetal small intestine',
    'Ovary':'Ovary',
    'Fetal Heart':'Fetal heart',
    'HepG2 Hepatocellular Carcinoma Cell Line':'HepG2 hepatocellular carcinoma line',
    'Pancreas':'Pancreas',
    'H1 Derived Neuronal Progenitor Cultured Cells':'H1 derived neuronal progenitors',
    'H1 BMP4 Derived Mesendoderm Cultured Cells':'H1 BMP4 derived mesendoderm',
    'iPS DF 6.9 Cell Line':'iPSC (DF 6.9)',
    'iPS DF 19.11 Cell Line':'iPSC (DF 19.11)',
    'H9 Cell Line':'H9 hESC',
    'H1 Cell Line':'H1 hESC',
    'HSMM cell derived Skeletal Muscle Myotubes Cell Line':'HSMM-derived skeletal muscle myotubes line',
    'Fetal Brain Male':'Fetal brain',
    'Fetal Thymus':'Fetal thymus',
    'H1 BMP4 Derived Trophoblast Cultured Cells':'H1 BMP4 derived trophoblasts',
    'Foreskin Keratinocyte Primary Cells skin02':'Foreskin keratinocytes',
    'Small Intestine':'Small intestine'
}

roadmap_dnase_res_f['my_name'] = [name_conv[x] for x in roadmap_dnase_res_f.name]



In [11]:

    
roadmap_dnase_res['my_name'] = np.nan
roadmap_dnase_res.ix[roadmap_dnase_res_f.index, 'my_name'] = roadmap_dnase_res_f.my_name

ENCODE



In [12]:

    
a = encode_dnase_res.shape[0]
b = encode_dnase_res.cell_type.value_counts().shape[0]
print('Tested {} samples from {} cell types.'.format(a, b))
print('Tested {} skin fibroblast samples.'.format(sum(encode_dnase_res.cell_type == 'skin fibroblast')))









    



Tested 209 samples from 134 cell types.
Tested 9 skin fibroblast samples.



In [13]:

    
encode_dnase_res.sort_values(by='pvalue', inplace=True)
pos = ', '.join([str(x + 1) for x in np.where(encode_dnase_res.cell_type == 'skin fibroblast')[0]])
print('The skin fibroblasts were in positions {} in the list of '
      'most enriched lines.'.format(pos))









    



The skin fibroblasts were in positions 19, 20, 24, 43, 47, 59, 155, 188, 206 in the list of most enriched lines.



In [14]:

    
encode_dnase_res.biosample_type.value_counts()









    Out[14]:





primary cell                          97
immortalized cell line                86
in vitro differentiated cells          9
tissue                                 8
stem cell                              5
induced pluripotent stem cell line     4
Name: biosample_type, dtype: int64



In [15]:

    
sample_type_conv = {
    'primary cell':'Primary cell',
    'immortalized cell line':'Immortalized line',
    'in vitro differentiated cells':'In vitro differentiated',
    'tissue':'Primary tissue',
    'stem cell':'Stem cell',
    'induced pluripotent stem cell line':'Stem cell',
}



In [16]:

    
encode_dnase_res['my_cell_type'] = [sample_type_conv[x] for x in encode_dnase_res.biosample_type]

Now I'll choose the top 25 cell types and rename them with more reasonable and consistent names. This site has info on the ENCODE cell lines.



In [17]:

    
encode_dnase_res_f = encode_dnase_res.sort_values(by='pvalue', ascending=False).tail(25)

name_conv = {
    'H7-hESC':'H7 hESC',
    'H1-hESC':'H1 hESC',
    'cardiac fibroblast':'Cardiac fibroblasts',
    'MCF-7':'MCF-7 breast cancer line',
    'cardiac mesoderm':'Cardiac mesoderm',
    'skin fibroblast':'Skin fibroblasts',
    'dermis blood vessel endothelial cell':'Dermis blood vessel endothelium',
    'RWPE1':'RWPE1 prostate prostate epithelial line',
    'GM12865':'GM12865 lymphoblastoid line',
    'HCT116':'HCT116 colorectal carcinoma line',
    'HTR-8/SVneo':'HTR-8/SVneo trophoblast line',
    'myoblast':'Myoblasts',
    'urothelium cell line':'Urothelium line',
    'olfactory neurosphere cell line':'Olfactory neurosphere line',
    'induced pluripotent stem cell':'iPSC',
    'H9':'H9 hESC',
    'NT2/D1':'NT2/D1 pluripotent embryonal carcinoma line',
    'K562':'K562',
    'Ishikawa':'Ishikawa',
    'HEK293':'HEK293',
    'SK-N-SH':'SK-N-SH',
    'HeLa-S3':'HeLa-S3',
    'fibroblast of the conjunctiva':'Conjunctiva fibroblast',
    'BE2C':'BE2C',
    'fibroblast of lung':'Lung fibroblast',
    'HuH-7':'HuH-7'
}

encode_dnase_res_f['my_name'] = [name_conv[x] for x in encode_dnase_res_f.cell_type]



In [18]:

    
encode_dnase_res['my_name'] = np.nan
encode_dnase_res.ix[encode_dnase_res_f.index, 'my_name'] = encode_dnase_res_f.my_name

TF results

I'll drop duplicate TF results (keeping the most significant). I also want to remove experiments that aren't for TFs.



In [19]:

    
tf_res = tf_res.sort_values(by='pvalue').drop_duplicates(subset=['target'])
tf_res.target_type.value_counts()









    Out[19]:





transcription factor                                                                                39
chromatin remodeller, transcription factor                                                           8
RNA binding protein, transcription factor                                                            3
broad histone mark, histone, histone modification                                                    2
chromatin remodeller, other context, other post-translational modification, transcription factor     1
histone, histone modification, narrow histone mark                                                   1
Name: target_type, dtype: int64

I'll keep all of the categories that have "transcription factor."



In [20]:

    
tf_res = tf_res[(tf_res.target_type != 'broad histone mark, histone, histone modification') & 
                (tf_res.target_type != 'histone, histone modification, narrow histone mark')]

I'll also remove Pol2.



In [21]:

    
tf_res = tf_res.drop(tf_res[tf_res.target.apply(lambda x: 'POLR2A' in x)].index)



In [22]:

    
print('We have {} TF experiments.'.format(tf_res.shape[0]))
n = np.where(tf_res.target == 'NANOG')[0][0]
print('NANOG is #{} in the list of most enriched TF.'.format(n + 1))
n = np.where(tf_res.target == 'POU5F1')[0][0]
print('POU5F1 is #{} in the list of most enriched TF.'.format(n + 1))









    



We have 49 TF experiments.
NANOG is #1 in the list of most enriched TF.
POU5F1 is #3 in the list of most enriched TF.



In [23]:

    
tf_res = tf_res.sort_values(by='odds_ratio', ascending=False)

Paper



In [24]:

    
sns.palplot(sns.color_palette('Set2', 7))



In [25]:

    
cdict = dict(zip(set(roadmap_dnase_res.my_cell_type) | set(encode_dnase_res.my_cell_type), 
                 sns.color_palette('Set2', 7)))



In [36]:

    
fig = plt.figure(figsize=(6.85, 6.1), dpi=300)

gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
ax.text(0, 1, 'Figure 2',
        size=16, va='top')
ciepy.clean_axis(ax)
ax.set_xticks([])
ax.set_yticks([])
gs.tight_layout(fig, rect=[0, 0.95, 0.5, 1])

# Roadmap DNase
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])

c = [cdict[x] for x in roadmap_dnase_res_f.my_cell_type]
(-np.log10(roadmap_dnase_res_f.pvalue)).plot(kind='barh', ax=ax, color=c, lw=0)
ax.set_xlabel('$-\log_{10}$ $p$-value', fontsize=6)
ax.set_ylabel('')
ax.grid(axis='y')
ax.set_yticklabels(roadmap_dnase_res_f.my_name, fontsize=6)

for t in ax.get_xticklabels() + ax.get_yticklabels():
    t.set_fontsize(6)
gs.tight_layout(fig, rect=[0, 0.45, 0.51, 0.94])

# ENCODE DNase
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])

c = [cdict[x] for x in encode_dnase_res_f.my_cell_type]
ax = (-np.log10(encode_dnase_res_f.pvalue)).plot(kind='barh', ax=ax, color=c, label=None, lw=0)
ax.set_xlabel('$-\log_{10}$ $p$-value', fontsize=6)
ax.set_ylabel('')
ax.grid(axis='y')
ax.set_yticklabels(encode_dnase_res_f.my_name, fontsize=6)
for t in ax.get_xticklabels() + ax.get_yticklabels():
    t.set_fontsize(6)
gs.tight_layout(fig, rect=[0.49, 0.45, 1, 0.95])

# Color legend
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
ciepy.clean_axis(ax)
rects = []
labels = []
for k in cdict.keys():
    if k == 'induced pluripotent stem cell line':
        labels.append('iPSC')
    elif k == 'in vitro differentiated cells':
        labels.append('in vitro\ndifferentiated\ncell')
    elif k == 'immortalized cell line':
        labels.append('immortalized\ncell line')
    else:
        labels.append(k)
    r = plt.Rectangle((0, 0), 0, 0, fc=cdict[k])
    rects.append(r)
lgd = ax.legend(rects, labels, loc='upper center', fontsize=6, ncol=7)
for p in lgd.get_patches():
    p.set_linewidth(0)
    
gs.tight_layout(fig, rect=[0, 0.45, 1, 0.5])

# ENCODE ChIP-seq
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])

colord = {True:sns.color_palette()[0], False:sns.color_palette()[1]}
tf_res.odds_ratio.plot(ax=ax, kind='bar', lw=0, color=[colord[x] for x in tf_res.pvalue < 0.05])
ax.set_ylabel('Odds ratio', fontsize=8)
ax.set_xlabel('')
ax.grid(axis='x')
ax.set_xticklabels(tf_res.target, fontsize=8)
for t in ax.get_yticklabels():
    t.set_fontsize(8)
    
# Color legend
rects = []
labels = ['$p$ < 0.05', '$p$ > 0.05']
r = plt.Rectangle((0, 0), 0, 0, fc=colord[True])
rects.append(r)
r = plt.Rectangle((0, 0), 0, 0, fc=colord[False])
rects.append(r)
lgd = ax.legend(rects, labels, loc='upper right', fontsize=8, frameon=True, fancybox=True)
for p in lgd.get_patches():
    p.set_linewidth(0)
gs.tight_layout(fig, rect=[0, 0, 1, 0.45])

t = fig.text(0.005, 0.92, 'A', weight='bold', 
             size=12)
t = fig.text(0.5, 0.92, 'B', weight='bold', 
             size=12)
t = fig.text(0.005, 0.42, 'C', weight='bold', 
             size=12)

fig.savefig(os.path.join(outdir, 'functional_annotation.png'), dpi=300)



In [27]:

    
roadmap_dnase_res.pvalue.max()









    Out[27]:





0.161173589054



In [28]:

    
roadmap_dnase_res_f.pvalue.max()









    Out[28]:





0.00035665251854699999



In [29]:

    
# I'll write these files so I can make them into a table for the paper.
roadmap_dnase_res.sort_values(by='pvalue').to_csv(os.path.join(outdir, 'roadmap_res.tsv'), sep='\t')
encode_dnase_res.sort_values(by='pvalue').to_csv(os.path.join(outdir, 'encode_res.tsv'), sep='\t')
tf_res.sort_values(by='odds_ratio', ascending=False).to_csv(os.path.join(outdir, 'tf_res.tsv'), sep='\t')



In [44]:

    
(.05) * 6.1 / 5.8









    Out[44]:





0.052586206896551725



In [53]:

    
fig = plt.figure(figsize=(6.85, 5.8), dpi=300)

# Roadmap DNase
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])

c = [cdict[x] for x in roadmap_dnase_res_f.my_cell_type]
(-np.log10(roadmap_dnase_res_f.pvalue)).plot(kind='barh', ax=ax, color=c, lw=0)
ax.set_xlabel('$-\log_{10}$ $p$-value', fontsize=6)
ax.set_ylabel('')
ax.grid(axis='y')
ax.set_yticklabels(roadmap_dnase_res_f.my_name, fontsize=6)

for t in ax.get_xticklabels() + ax.get_yticklabels():
    t.set_fontsize(6)
gs.tight_layout(fig, rect=[0, 0.47, 0.51, 1])

# ENCODE DNase
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])

c = [cdict[x] for x in encode_dnase_res_f.my_cell_type]
ax = (-np.log10(encode_dnase_res_f.pvalue)).plot(kind='barh', ax=ax, color=c, label=None, lw=0)
ax.set_xlabel('$-\log_{10}$ $p$-value', fontsize=6)
ax.set_ylabel('')
ax.grid(axis='y')
ax.set_yticklabels(encode_dnase_res_f.my_name, fontsize=6)
for t in ax.get_xticklabels() + ax.get_yticklabels():
    t.set_fontsize(6)
gs.tight_layout(fig, rect=[0.49, 0.47, 1, 1])

# Color legend
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
ciepy.clean_axis(ax)
rects = []
labels = []
for k in cdict.keys():
    if k == 'induced pluripotent stem cell line':
        labels.append('iPSC')
    elif k == 'in vitro differentiated cells':
        labels.append('in vitro\ndifferentiated\ncell')
    elif k == 'immortalized cell line':
        labels.append('immortalized\ncell line')
    else:
        labels.append(k)
    r = plt.Rectangle((0, 0), 0, 0, fc=cdict[k])
    rects.append(r)
lgd = ax.legend(rects, labels, loc='upper center', fontsize=6, ncol=7)
for p in lgd.get_patches():
    p.set_linewidth(0)
    
gs.tight_layout(fig, rect=[0, 0.46, 1, 0.52])

# ENCODE ChIP-seq
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])

colord = {True:sns.color_palette()[0], False:sns.color_palette()[1]}
tf_res.odds_ratio.plot(ax=ax, kind='bar', lw=0, color=[colord[x] for x in tf_res.pvalue < 0.05])
ax.set_ylabel('Odds ratio', fontsize=8)
ax.set_xlabel('')
ax.grid(axis='x')
ax.set_xticklabels(tf_res.target, fontsize=8)
for t in ax.get_yticklabels():
    t.set_fontsize(8)
    
# Color legend
rects = []
labels = ['$p$ < 0.05', '$p$ > 0.05']
r = plt.Rectangle((0, 0), 0, 0, fc=colord[True])
rects.append(r)
r = plt.Rectangle((0, 0), 0, 0, fc=colord[False])
rects.append(r)
lgd = ax.legend(rects, labels, loc='upper right', fontsize=8, frameon=True, fancybox=True)
for p in lgd.get_patches():
    p.set_linewidth(0)
gs.tight_layout(fig, rect=[0, 0, 1, 0.46])

t = fig.text(0.005, 0.97, 'A', weight='bold', 
             size=12)
t = fig.text(0.5, 0.97, 'B', weight='bold', 
             size=12)
t = fig.text(0.005, 0.43, 'C', weight='bold', 
             size=12)

fig.savefig(os.path.join(outdir, 'functional_annotation.pdf'), dpi=300)



In [ ]:



In [ ]:



In [30]:

    
3 +









    



  File "<ipython-input-30-4715fb1d5390>", line 1
    3 +
        ^
SyntaxError: invalid syntax

Presentation



In [ ]:

    
fig = plt.figure(figsize=(10, 6), dpi=300)

fs = 10

# Roadmap DNase
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])

c = [cdict[x] for x in roadmap_dnase_res_f.my_cell_type]
(-np.log10(roadmap_dnase_res_f.pvalue)).plot(kind='barh', ax=ax, color=c, lw=0)
ax.set_xlabel('$-\log_{10}$ $p$-value', fontsize=12)
ax.set_ylabel('')
ax.grid(axis='y')
ax.set_yticklabels(roadmap_dnase_res_f.my_name, fontsize=fs)

for t in ax.get_xticklabels() + ax.get_yticklabels():
    t.set_fontsize(fs)
gs.tight_layout(fig, rect=[0, 0.15, 0.51, 1])

# ENCODE DNase
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])

c = [cdict[x] for x in encode_dnase_res_f.my_cell_type]
ax = (-np.log10(encode_dnase_res_f.pvalue)).plot(kind='barh', ax=ax, color=c, label=None, lw=0)
ax.set_xlabel('$-\log_{10}$ $p$-value', fontsize=fs)
ax.set_ylabel('')
ax.grid(axis='y')
ax.set_yticklabels(encode_dnase_res_f.my_name, fontsize=fs)
for t in ax.get_xticklabels() + ax.get_yticklabels():
    t.set_fontsize(fs)
gs.tight_layout(fig, rect=[0.49, 0.15, 1, 1])

# Color legend
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])
ciepy.clean_axis(ax)
rects = []
labels = []
for k in cdict.keys():
    if k == 'induced pluripotent stem cell line':
        labels.append('iPSC')
    elif k == 'in vitro differentiated cells':
        labels.append('in vitro\ndifferentiated\ncell')
    elif k == 'immortalized cell line':
        labels.append('immortalized\ncell line')
    else:
        labels.append(k)
    r = plt.Rectangle((0, 0), 0, 0, fc=cdict[k])
    rects.append(r)
lgd = ax.legend(rects, labels, loc='upper center', fontsize=fs, ncol=4)
for p in lgd.get_patches():
    p.set_linewidth(0)
    
gs.tight_layout(fig, rect=[0, 0, 1, 0.15])

fig.savefig(os.path.join(outdir, 'dnase_presentation.pdf'))



In [ ]:

    
fig = plt.figure(figsize=(8, 5), dpi=300)

# ENCODE ChIP-seq
gs = gridspec.GridSpec(1, 1)
ax = fig.add_subplot(gs[0, 0])

colord = {True:sns.color_palette()[0], False:sns.color_palette()[1]}
tf_res.odds_ratio.plot(ax=ax, kind='bar', lw=0, color=[colord[x] for x in tf_res.pvalue < 0.05])
ax.set_ylabel('Odds ratio', fontsize=12)
ax.set_xlabel('')
ax.grid(axis='x')
ax.set_xticklabels(tf_res.target, fontsize=11)
for t in ax.get_yticklabels():
    t.set_fontsize(12)
    
# Color legend
rects = []
labels = ['$p$ < 0.05', '$p$ > 0.05']
r = plt.Rectangle((0, 0), 0, 0, fc=colord[True])
rects.append(r)
r = plt.Rectangle((0, 0), 0, 0, fc=colord[False])
rects.append(r)
lgd = ax.legend(rects, labels, loc='upper right', fontsize=12, frameon=True, fancybox=True)
for p in lgd.get_patches():
    p.set_linewidth(0)
gs.tight_layout(fig, rect=[0, 0, 1, 1])

fig.savefig(os.path.join(outdir, 'tf_presentation.pdf'))