Package imports



In [1]:

    
%matplotlib inline
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from matplotlib import rcParams
import seaborn as sns
from sys import argv
from collections import OrderedDict

Functions



In [2]:

    
def plot_log2fc_along_genome(log2fc, cond1, cond2, label, gene_set):
    fig = plt.plot(df['gene_position'], df[log2fc], color='k', linewidth=1)
    
    for pathway in gene_set:
        plt.scatter(df['gene_position'][gene_dict[pathway]], df[log2fc][gene_dict[pathway]], 
                    s=80, edgecolor='black', facecolor=sns.xkcd_rgb[color_dict[pathway]], 
                    label=pathway, zorder=10)
    
    plt.legend(bbox_to_anchor=(0., 1.02, 1., .102), loc=3,
           ncol=7, mode='expand', borderaxespad=0, fontsize=14, handletextpad=0.1)
    
    # plot up-reg
    plt.text(100, 7.5, 'Enriched in %s cells' % name_lookup[cond1], size=14, 
             color=xy_color_dict[cond1], verticalalignment='center')
    plt.text(100, -7.5, 'Enriched in %s cells' % name_lookup[cond2], size=14, 
             color=xy_color_dict[cond2], verticalalignment='center')
    
    # axis labels, limits and ticks
    if label is True:
        plt.xlabel('Position along Vibrio fischeri ES114 genome (gene 1 to 3693)')
    else:
        pass
    plt.ylabel('%s / %s, log2(fc)' % (cond1, cond2))
    plt.xlim([-100, len(df.index)+100])
    plt.ylim([-10, 10])
    plt.yticks([-9, -6, -3, 0, 3, 6, 9])
    plt.xlabel('Position along Vibrio fischeri ES114 genome (gene 1 to 3693)')

Import data



In [3]:

    
path = '../../data/results_rpkm.csv'



In [4]:

    
df = pd.read_csv(path, index_col=0)



In [5]:

    
genepos = np.arange(len(df.index))
df['gene_position'] = genepos



In [6]:

    
df_readable = df[['gene_position', 'Plk_rpkm_mean', 'Vnt_rpkm_mean', 'Vnt-vs-Plk_log2fc', 'Description']]



In [7]:

    
# top most enriched in Plk
df_readable.sort_values(by='Vnt-vs-Plk_log2fc', ascending=True).head()









    Out[7]:







  
    
      
      gene_position
      Plk_rpkm_mean
      Vnt_rpkm_mean
      Vnt-vs-Plk_log2fc
      Description
    
    
      VF_number
      
      
      
      
      
    
  
  
    
      VF_A1087
      3552
      531.175848
      1.756022
      -8.728763
      ABC transporter substrate-binding protein
    
    
      VF_A1090
      3555
      89.540751
      0.334746
      -7.652550
      ABC transporter permease protein
    
    
      VF_A0487
      2985
      1450.252227
      20.915594
      -6.681156
      outer membrane protein U paralog%2C OmpU2
    
    
      VF_1611
      1560
      773.211006
      18.003958
      -5.916573
      phosphate ABC transporter%2C permease protein ...
    
    
      VF_A1057
      3525
      803.798728
      25.547421
      -5.715755
      phosphatase



In [8]:

    
# top most enriched in Plk
df_readable.sort_values(by='Vnt-vs-Plk_log2fc', ascending=False).head()









    Out[8]:







  
    
      
      gene_position
      Plk_rpkm_mean
      Vnt_rpkm_mean
      Vnt-vs-Plk_log2fc
      Description
    
    
      VF_number
      
      
      
      
      
    
  
  
    
      VF_A0623
      3109
      3.867351
      1874.936492
      6.695796
      hypothetical protein
    
    
      VF_2603
      2487
      0.150680
      158.316022
      5.704440
      trp operon leader peptide
    
    
      VF_1082
      1042
      12.870603
      1183.897230
      5.579681
      glutathione S-transferase
    
    
      VF_A0701
      3186
      1.871989
      358.119232
      5.543864
      transcriptional regulator
    
    
      VF_A0920
      3393
      24.226062
      3355.125961
      5.506721
      luciferase beta chain LuxB

Set what to plot



In [9]:

    
gene_dict = {
    'Phosphate': ['VF_A1087', 'VF_A1090', 'VF_1611', 'VF_A1057', 'VF_1610', 'VF_1613', 
                  'VF_1612', 'VF_A1089'],
    'Motility': ['VF_1851', 'VF_2079', 'VF_1842', 'VF_2317', 'VF_1843', 'VF_1863', 'VF_1841'],
    'Lipid peroxidation': ['VF_1081', 'VF_1082', 'VF_1083', 'VF_A1049', 'VF_A1050'],
    'lux operon': ['VF_A0918', 'VF_A0919', 'VF_A0920', 'VF_A0924', 'VF_A0921', 'VF_A0922', 
                   'VF_A0923', 'VF_A0924'],
    'LuxI-regulated': ['VF_A0985', 'VF_1161', 'VF_1162', 'VF_1725', 'VF_A0090', 'VF_A0622', 
                       'VF_A1058'],
    'Host colonization': ['VF_0475', 'VF_A0487', 'VF_A0875', 'VF_A0874', 'VF_A0872', 'VF_A0870', 
                          'VF_A0867', 'VF_A0866', 'VF_A0868', 'VF_A0869'],    
    'TMAO reductase': ['VF_A0188', 'VF_A0189'],
    'Fat catabolism': ['VF_0533'],
    'Amino acids': ['VF_1585', 'VF_1586', 'VF_A0840'],
    'PTS sugars': ['VF_A0747', 'VF_A1189', 'VF_A0941', 'VF_A0942'],
    'Non-PTS sugars': ['VF_A0799']}

Plot formatting



In [10]:

    
sns.set_context("talk")



In [11]:

    
# colors from http://xkcd.com/color/rgb/
color_dict = {
    'Phosphate': 'blue',
    'Motility': 'light blue',
    'Lipid peroxidation': 'orange',
    'lux operon': 'red',
    'LuxI-regulated': 'pink',
    'Host colonization': 'teal',
    'TMAO reductase': 'bright blue',
    'Fat catabolism': 'kelly green',
    'Amino acids': 'goldenrod',
    'PTS sugars': 'coral',
    'Non-PTS sugars': 'carnation pink'}



In [12]:

    
# xy line colors
unity_color = sns.xkcd_rgb['grey']
xy_color_dict = {'Plk': sns.xkcd_rgb['black'],
                 'Swt': sns.xkcd_rgb['black'],
                 'Vnt': sns.xkcd_rgb['black']}



In [13]:

    
name_lookup = {
    'Plk': 'planktonic',
    'Swt': 'SWT',
    'Vnt': 'vented'}

Genome plot



In [14]:

    
fig = plt.figure(figsize=(16,5))
plot_log2fc_along_genome('Vnt-vs-Plk_log2fc', 'Vnt', 'Plk', False, 
                         ['Phosphate', 'Motility', 'Lipid peroxidation', 'lux operon', 
                          'LuxI-regulated', 'Host colonization'])
plt.savefig('genome_vnt_plk.pdf')



In [ ]:

	gene_position	Plk_rpkm_mean	Vnt_rpkm_mean	Vnt-vs-Plk_log2fc	Description
VF_number
VF_A1087	3552	531.175848	1.756022	-8.728763	ABC transporter substrate-binding protein
VF_A1090	3555	89.540751	0.334746	-7.652550	ABC transporter permease protein
VF_A0487	2985	1450.252227	20.915594	-6.681156	outer membrane protein U paralog%2C OmpU2
VF_1611	1560	773.211006	18.003958	-5.916573	phosphate ABC transporter%2C permease protein ...
VF_A1057	3525	803.798728	25.547421	-5.715755	phosphatase

	gene_position	Plk_rpkm_mean	Vnt_rpkm_mean	Vnt-vs-Plk_log2fc	Description
VF_number
VF_A0623	3109	3.867351	1874.936492	6.695796	hypothetical protein
VF_2603	2487	0.150680	158.316022	5.704440	trp operon leader peptide
VF_1082	1042	12.870603	1183.897230	5.579681	glutathione S-transferase
VF_A0701	3186	1.871989	358.119232	5.543864	transcriptional regulator
VF_A0920	3393	24.226062	3355.125961	5.506721	luciferase beta chain LuxB