In [1]:
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
from sys import argv
from collections import OrderedDict
In [2]:
def plot_log2fc_along_genome(log2fc, cond1, cond2, label, gene_set):
fig = plt.plot(df['gene_position'], df[log2fc], color='k', linewidth=1)
for pathway in gene_set:
plt.scatter(df['gene_position'][gene_dict[pathway]], df[log2fc][gene_dict[pathway]],
s=80, edgecolor='black', facecolor=sns.xkcd_rgb[color_dict[pathway]],
label=pathway, zorder=10)
plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
ncol=7, mode='expand', borderaxespad=0, fontsize=14, handletextpad=0.1)
# plot up-reg
plt.text(100, 7.5, 'Enriched in %s cells' % name_lookup[cond1], size=14,
color=xy_color_dict[cond1], verticalalignment='center')
plt.text(100, -7.5, 'Enriched in %s cells' % name_lookup[cond2], size=14,
color=xy_color_dict[cond2], verticalalignment='center')
# axis labels, limits and ticks
if label is True:
plt.xlabel('Position along Vibrio fischeri ES114 genome (gene 1 to 3693)')
else:
pass
plt.ylabel('%s / %s, log2(fc)' % (cond1, cond2))
plt.xlim([-100, len(df.index)+100])
plt.ylim([-10, 10])
plt.yticks([-9, -6, -3, 0, 3, 6, 9])
plt.xlabel('Position along Vibrio fischeri ES114 genome (gene 1 to 3693)')
In [3]:
path = '../../data/results_rpkm.csv'
In [4]:
df = pd.read_csv(path, index_col=0)
In [5]:
genepos = np.arange(len(df.index))
df['gene_position'] = genepos
In [6]:
df_readable = df[['gene_position', 'Plk_rpkm_mean', 'Vnt_rpkm_mean', 'Vnt-vs-Plk_log2fc', 'Description']]
In [7]:
# top most enriched in Plk
df_readable.sort_values(by='Vnt-vs-Plk_log2fc', ascending=True).head()
Out[7]:
In [8]:
# top most enriched in Plk
df_readable.sort_values(by='Vnt-vs-Plk_log2fc', ascending=False).head()
Out[8]:
In [9]:
gene_dict = {
'Phosphate': ['VF_A1087', 'VF_A1090', 'VF_1611', 'VF_A1057', 'VF_1610', 'VF_1613',
'VF_1612', 'VF_A1089'],
'Motility': ['VF_1851', 'VF_2079', 'VF_1842', 'VF_2317', 'VF_1843', 'VF_1863', 'VF_1841'],
'Lipid peroxidation': ['VF_1081', 'VF_1082', 'VF_1083', 'VF_A1049', 'VF_A1050'],
'lux operon': ['VF_A0918', 'VF_A0919', 'VF_A0920', 'VF_A0924', 'VF_A0921', 'VF_A0922',
'VF_A0923', 'VF_A0924'],
'LuxI-regulated': ['VF_A0985', 'VF_1161', 'VF_1162', 'VF_1725', 'VF_A0090', 'VF_A0622',
'VF_A1058'],
'Host colonization': ['VF_0475', 'VF_A0487', 'VF_A0875', 'VF_A0874', 'VF_A0872', 'VF_A0870',
'VF_A0867', 'VF_A0866', 'VF_A0868', 'VF_A0869'],
'TMAO reductase': ['VF_A0188', 'VF_A0189'],
'Fat catabolism': ['VF_0533'],
'Amino acids': ['VF_1585', 'VF_1586', 'VF_A0840'],
'PTS sugars': ['VF_A0747', 'VF_A1189', 'VF_A0941', 'VF_A0942'],
'Non-PTS sugars': ['VF_A0799']}
In [10]:
sns.set_context("talk")
In [11]:
# colors from http://xkcd.com/color/rgb/
color_dict = {
'Phosphate': 'blue',
'Motility': 'light blue',
'Lipid peroxidation': 'orange',
'lux operon': 'red',
'LuxI-regulated': 'pink',
'Host colonization': 'teal',
'TMAO reductase': 'bright blue',
'Fat catabolism': 'kelly green',
'Amino acids': 'goldenrod',
'PTS sugars': 'coral',
'Non-PTS sugars': 'carnation pink'}
In [12]:
# xy line colors
unity_color = sns.xkcd_rgb['grey']
xy_color_dict = {'Plk': sns.xkcd_rgb['black'],
'Swt': sns.xkcd_rgb['black'],
'Vnt': sns.xkcd_rgb['black']}
In [13]:
name_lookup = {
'Plk': 'planktonic',
'Swt': 'SWT',
'Vnt': 'vented'}
In [14]:
fig = plt.figure(figsize=(16,5))
plot_log2fc_along_genome('Vnt-vs-Plk_log2fc', 'Vnt', 'Plk', False,
['Phosphate', 'Motility', 'Lipid peroxidation', 'lux operon',
'LuxI-regulated', 'Host colonization'])
plt.savefig('genome_vnt_plk.pdf')
In [ ]: