In [1]:
%matplotlib inline
In [2]:
# vibrio_plot_scatter.py
# Package imports
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
import ast
from matplotlib import rc
#from sys import argv
# Specify what to plot
xaxis = 'Plk' # argv[1]
yaxis = 'Vnt' # argv[2]
xlabel = 'Planktonic cells (RPKM)' # argv[3]
ylabel = 'Vented cells (RPKM)' # argv[4]
axis_limits = [1e-2, 1e6, 1e-2, 1e6]
pathway_csv = 'Phosphate,Flagellar,LipidPerox,LuxOperon,LuxIregulated,HostColonization' # argv[5]
gene_dict = {
'Phosphate': ['VF_A1087', 'VF_A1090', 'VF_1611', 'VF_A1057', 'VF_1610', 'VF_1613',
'VF_1612', 'VF_A1089'],
'Flagellar': ['VF_1851', 'VF_2079', 'VF_1842', 'VF_2317', 'VF_1843', 'VF_1863', 'VF_1841'],
'LipidPerox': ['VF_1081', 'VF_1082', 'VF_1083', 'VF_A1049', 'VF_A1050'],
'LuxOperon': ['VF_A0918', 'VF_A0919', 'VF_A0920', 'VF_A0924', 'VF_A0921', 'VF_A0922',
'VF_A0923', 'VF_A0924'],
'LuxIregulated': ['VF_A0985', 'VF_1161', 'VF_1162', 'VF_1725', 'VF_A0090', 'VF_A0622',
'VF_A1058'],
'HostColonization': ['VF_0475', 'VF_A0487', 'VF_A0875', 'VF_A0874', 'VF_A0872', 'VF_A0870',
'VF_A0867', 'VF_A0866', 'VF_A0868', 'VF_A0869'],
'TMAOreductase': ['VF_A0188', 'VF_A0189'],
'FatCatabolism': ['VF_0533'],
'AminoAcid': ['VF_1585', 'VF_1586', 'VF_A0840'],
'PTSsugars': ['VF_A0747', 'VF_A1189', 'VF_A0941', 'VF_A0942'],
'NonPTSsugars': ['VF_A0799']
}
coords_dict = {
'Phosphate': [3e3, 7e1],
'Flagellar': [1.5e2, 8e0],
'LipidPerox': [2e-1, 2.9e2],
'LuxOperon': [1e1, 8e3],
'LuxIregulated': [1.5e-1, 1e3],
'HostColonization': [1.5e-2, 8.5e0],
'TMAOreductase': [1e1, 1e4],
'FatCatabolism': [1e1, 1e4],
'AminoAcid': [1e1, 1e4],
'PTSsugars': [1e1, 1e4],
'NonPTSsugars': [1e1, 1e4]
}
name_dict = {
'Phosphate': 'Phosphate',
'Flagellar': 'Motility',
'LipidPerox': 'Lipid\nperoxidation',
'LuxOperon': 'lux operon',
'LuxIregulated': 'LuxI-regulated',
'HostColonization': 'Host\ncolonization',
'TMAOreductase': 'TMAO reductase',
'FatCatabolism': 'Fat catabolism',
'AminoAcid': 'Amino acids',
'PTSsugars': 'PTS sugars',
'NonPTSsugars': 'Non-PTS sugars'
}
name_lookup = {
'Plk': 'planktonic',
'Swt': 'SWT',
'Vnt': 'vented'}
# colors from http://xkcd.com/color/rgb/
# pathway colors
color_csv = 'blue,light blue,orange,red,pink,teal' # argv[6]
# xy line colors
unity_color = sns.xkcd_rgb['grey']
xy_color_dict = {'Plk': sns.xkcd_rgb['grey'],
'Swt': sns.xkcd_rgb['grey'],
'Vnt': sns.xkcd_rgb['grey']}
gene_labels = 'False' # argv[7]
gene_labels = ast.literal_eval(gene_labels)
# ### Import data
path = '../../data/results_rpkm.csv' # argv[8]
df = pd.read_csv(path, index_col=0)
# ### Plot formatting
# Seaborn style and context
sns.set_context("talk")
sns.set_style("white")
sns.set_style("ticks", {"xtick.major.size": 8, "ytick.major.size": 8})
# ### Scatter plot
# specify column names
xvar = xaxis + '_rpkm_mean'
yvar = yaxis + '_rpkm_mean'
# specify reference lines
x = [1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6]
x_pos3 = [a * 8.0 for a in x]
x_pos2 = [a * 4.0 for a in x]
x_pos1 = [a * 2.0 for a in x]
x_neg3 = [a * 0.125 for a in x]
x_neg2 = [a * 0.250 for a in x]
x_neg1 = [a * 0.500 for a in x]
# reference lines and labels for log2fc = -3, -2, -1, 0, 1, 2, 3
# (identified DEGs as p-value < 0.001 and log2FoldChange > 3.0)
fig = plt.figure(figsize=(8,8))
plt.plot(x, x, zorder=0, color=unity_color)
plt.plot(x, x_pos3, zorder=0, color=xy_color_dict[yaxis], linewidth=1)
plt.plot(x, x_pos2, zorder=0, color=xy_color_dict[yaxis], linewidth=1)
plt.plot(x, x_pos1, zorder=0, color=xy_color_dict[yaxis], linewidth=1)
plt.plot(x, x_neg3, zorder=0, color=xy_color_dict[xaxis], linewidth=1)
plt.plot(x, x_neg2, zorder=0, color=xy_color_dict[xaxis], linewidth=1)
plt.plot(x, x_neg1, zorder=0, color=xy_color_dict[xaxis], linewidth=1)
plt.text(4e3, 2e5, 'Enriched in\n%s cells' % name_lookup[yaxis], fontsize=14,
horizontalalignment='center', color=xy_color_dict[yaxis])
plt.text(5e5*0.125, 5e5, '8x', fontsize=14, horizontalalignment='center',
verticalalignment='center', color=xy_color_dict[yaxis],
bbox={'facecolor':'white', 'edgecolor':'none', 'pad':1})
plt.text(5e5*0.250, 5e5, '4x', fontsize=14, horizontalalignment='center',
verticalalignment='center', color=xy_color_dict[yaxis],
bbox={'facecolor':'white', 'edgecolor':'none', 'pad':1})
plt.text(5e5*0.500, 5e5, '2x', fontsize=14, horizontalalignment='center',
verticalalignment='center', color=xy_color_dict[yaxis],
bbox={'facecolor':'white', 'edgecolor':'none', 'pad':1})
plt.text(2e5, 8e2, 'Enriched in\n%s\ncells' % name_lookup[xaxis], fontsize=14,
horizontalalignment='center', color=xy_color_dict[xaxis])
plt.text(5e5, 5e5*0.125, '8x', fontsize=14, horizontalalignment='center',
verticalalignment='center', color=xy_color_dict[xaxis],
bbox={'facecolor':'white', 'edgecolor':'none', 'pad':1})
plt.text(5e5, 5e5*0.250, '4x', fontsize=14, horizontalalignment='center',
verticalalignment='center', color=xy_color_dict[xaxis],
bbox={'facecolor':'white', 'edgecolor':'none', 'pad':1})
plt.text(5e5, 5e5*0.500, '2x', fontsize=14, horizontalalignment='center',
verticalalignment='center', color=xy_color_dict[xaxis],
bbox={'facecolor':'white', 'edgecolor':'none', 'pad':1})
# main plot
plt.scatter(df[xvar], df[yvar], s=80, edgecolor='black', facecolor='none')
plt.xscale('log')
plt.yscale('log')
plt.xlabel(xlabel)
plt.ylabel(ylabel)
plt.axis(axis_limits)
# highlight genes
pathway_list = pathway_csv.split(',')
pathway_color = color_csv.split(',')
for i in range(len(pathway_list)):
# get VF list from pathway
gene_list = gene_dict[pathway_list[i]]
# gene colors
gene_color = sns.xkcd_rgb[pathway_color[i]]
# plot genes in pathway
plt.scatter(df[xvar][gene_list], df[yvar][gene_list], s=80, edgecolor='black',
facecolor=gene_color)
# add gene labels if requested
if gene_labels:
labels_string = 'labels'
for gene in gene_list:
plt.text(df[xvar][gene], df[yvar][gene], gene)
else:
labels_string = 'nolabels'
# add pathway labels
coords_list = coords_dict[pathway_list[i]]
plt.text(coords_list[0], coords_list[1], name_dict[pathway_list[i]], fontsize=14,
color=sns.xkcd_rgb[pathway_color[i]], horizontalalignment='left', verticalalignment='center')
# save pdf
gene_set = '_'.join(pathway_list)
plt.savefig('%s_%s_%s_%s.pdf' % (xaxis, yaxis, gene_set, labels_string))
In [ ]: