Plotting Spectra

We are plotting fluorescence assay spectra collected according to this lab-protocols protocol. Further details of the specific experiment can be found in the fluorescence assay lab notebook on December 15-18, 2015.


In [1]:
from lxml import etree
import pandas as pd
import matplotlib as mpl
import matplotlib.cm as cm
import matplotlib.pyplot as plt
import seaborn as sns
from glob import glob
%matplotlib inline

Here each set of files is for a specific protein on one 96 well plate, and each file is a set of two rows that corresponds to a gradient of one ligand, with the first row being 1 uM of the protein and the second row containing only buffer and the ligand. This an example of a single plate (containing 1 uM of one type of protein in every other well):

Loading in files and defining proteins and ligands and their order in the data:


In [2]:
file_set = {'Src': glob("2015-12-15/*.xml"),
        'SrcGK': glob("2015-12-16/*.xml"),
        'AblGK': glob("2015-12-17/*.xml"),
        'Abl': glob("2015-12-18/*.xml")}
ligands = ['Bosutinib','Bosutinib Isomer','Erlotinib','Gefitinib']

In [3]:
#Src = glob("2015-12-15/*.xml")
#SrcGK = glob("2015-12-16/*.xml")
#AblGK = glob("2015-12-17/*.xml")
#Abl = glob("2015-12-18/*.xml")
#files = [Src,SrcGK,Abl,AblGK]

In [5]:
#This function allows us to import xml format data files and convert them to a pandas dataframe
def xml2df(file):

    root = etree.parse(file)

    data = []

    reads = root.xpath("/*/Section[1]/*/Well")

    wellIDs = [read.attrib['Pos'] for read in reads]

    data = [(s.text, float(s.attrib['WL']), r.attrib['Pos'])
        for r in reads
        for s in r]

    dataframe = pd.DataFrame(data, columns=['fluorescence','wavelength (nm)','Well'])
            
    ### dataframe_rep replaces 'OVER' (when fluorescence signal maxes out) with '3289277', an arbitrarily high number

    dataframe_rep = dataframe.replace({'OVER':'3289277'})

    dataframe_rep[['fluorescence']] = dataframe_rep[['fluorescence']].astype('float')
            
    dataframe_pivot = pd.pivot_table(dataframe_rep, index = 'wavelength (nm)', columns = ['Well'])
    
    #Rearrange columns so they're in the right order
    cols =  dataframe_pivot['fluorescence'].columns.tolist()
    cols = [cols[0]] + cols[4:11] + cols[1:4] + [cols[12]] + cols[16:23] + cols[13:16]
    dataframe_reindex =  dataframe_pivot.reindex_axis(cols,level='Well',axis=1)
    
    return dataframe_reindex

In [6]:
dataframe = xml2df(Src[0])

In [7]:
dataframe.head()


Out[7]:
fluorescence
Well A1 A2 A3 A4 A5 A6 A7 A8 A10 A11 ... B2 B3 B4 B5 B6 B7 B8 B10 B11 B12
wavelength (nm)
280 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 ... 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277
285 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 ... 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277
290 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 ... 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277
295 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 ... 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277
300 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 ... 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277 3289277

5 rows × 22 columns


In [8]:
#This function allows us to plot spectra
def plot_spectra_grid(file_set,protein,ligands,ligand):
    grid = len(protein) + len(ligand)
    
    # pick the correct file
    proteins = file_set.keys()
    index = ligands.index(ligand)
    file = file_set[protein][index]
    
    # pick a title
    title = "%s - %s" %(protein, ligand)
    
    df = xml2df(file)
    
    # plot the spectra
    fig = plt.figure();
    ax = df['fluorescence'].iloc[:,12].plot(ylim=(0,100000),legend=False, linewidth=4,color='m');
    ax.axvline(x=480,color='0.7',linestyle='--');
    for i in range(11):
        #s = df['fluorescence'].iloc[:,i].plot(ylim=(0,100000),linewidth=3,c=cm.hsv(i*15), ax = ax, title=title);
        df['fluorescence'].iloc[:,i].plot(ylim=(0,100000),linewidth=3,c=cm.hsv(i*15), ax = ax, title=title);
        df['fluorescence'].iloc[:,11+i].plot(ylim=(0,100000),legend=False, linewidth=4,c=cm.gray(i*15+50), ax = ax);
    
    # The s = line above and this an attempt at making a color bar that so far has not worked
    #cbar = plt.colorbar(mappable=s, ax=ax)
    #cbar.set_label('My Label')

In [9]:
#You can use it to generate a single plot
plot_spectra_grid(file_set,'Src',ligands,'Bosutinib')



In [10]:
# or eventually it will also give a matrix of plots

In [11]:
#this function allows us to plot the saturation curve at a single wavelength of the spectra
def plot_spectra2singlet(file_set,ligands,wavelength):
    
    fig, axes = plt.subplots(nrows=len(file_set), ncols=4, figsize=(22,22))
    
    proteins = file_set.keys()
    
    for j,protein in enumerate(file_set):
    
        files = file_set[protein]
        print file_set[protein]
    
        for i in range(len(files)):
        
            #Extract data from the xml file and make a dataframe
            df = xml2df(files[i])

            hardcode = wavelength #nm
     
            # This plots things.
            df.loc[hardcode][0:11].plot(ax = axes[j,i], xticks=[],linewidth=4)
            df.loc[hardcode][11:23].plot(ax = axes[j,i], xticks=[],linewidth=4,title ='%s - %s' %(proteins[j], ligands[i]))
        plt.text(4,20000,'wavelength %s nm'%hardcode,fontsize=20)

In [12]:
#This works for both a full set of proteins
plot_spectra2singlet(file_set,ligands,480)


['2015-12-17/AblD382N-T334I_Bos_20151217_bw2020_gain120_120553.xml', '2015-12-17/AblD382N-T334I_BosI_20151217_bw2020_gain120123031.xml', '2015-12-17/AblD382N-T334I_Erl_20151217_bw2020_gain120_125515.xml', '2015-12-17/AblD382N-T334I_Gef_20151217_bw2020_gain120_132641.xml']
['2015-12-15/Src_Bos_20151215_bw2020_gain120_161211.xml', '2015-12-15/Src_BosI_20151215_bw2020_gain120_163633.xml', '2015-12-15/Src_Erl_20151215_bw2020_gain120_170056.xml', '2015-12-15/Src_Gef_20151215_bw2020_gain120_172518.xml']
['2015-12-18/AblD382N_Bos_20151218_bw2020_gain120_ 14-41-14_plate_1.xml', '2015-12-18/AblD382N_BosI_20151218_bw2020_gain12014-55-55_plate_1.xml', '2015-12-18/AblD382N_Erl_20151218_bw2020_gain120 15-11-11_plate_1.xml', '2015-12-18/AblD382N_Gef_20151218_bw2020_gain120 15-25-58_plate_1.xml']
['2015-12-16/SrcT338I_Bos_20151216_bw2020_gain120_152505.xml', '2015-12-16/SrcT338I_BosI_20151216_bw2020_gain120_154934.xml', '2015-12-16/SrcT338I_Erl_20151216_bw2020_gain120_161404.xml', '2015-12-16/SrcT338I_Gef_bw2020_gain120_20151216_164154.xml']

In [13]:
# or just one protein
#  This actually doesn't work for some reason
#Src = {'Src': glob("2015-12-15/*.xml")}
#plot_spectra2singlet(Src,ligands,480)
#  ERROR
#  too many indices for array
#  ---> 21             df.loc[hardcode][0:11].plot(ax = axes[j,i], xticks=[],linewidth=4)

In [ ]: