notebook.community

Edit and run



In [1]:

    
import sys

# os.environ['R_LIBS_USER'] = '/project/projectdirs/metatlas/r_pkgs/'
curr_ld_lib_path = ''

# os.environ['LD_LIBRARY_PATH'] = curr_ld_lib_path + ':/project/projectdirs/openmsi/jupyterhub_libs/boost_1_55_0/lib' + ':/project/projectdirs/openmsi/jupyterhub_libs/lib'

# sys.path.insert(0, '/project/projectdirs/metatlas/python_pkgs/')
sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )
sys.path.append('/global/project/projectdirs/openmsi/jupyterhub_libs/anaconda/lib/python2.7/site-packages')

from metatlas import metatlas_objects as metob
from metatlas import h5_query as h5q

import qgrid

from matplotlib import pyplot as plt
import pandas as pd
import os
import tables
import pickle
%matplotlib inline


import dill

import numpy as np

from rdkit import Chem
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
# from rdkit.Chem.rdMolDescriptors import ExactMolWt
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import AllChem
from rdkit.Chem import Draw
from rdkit.Chem import rdDepictor
from rdkit.Chem.Draw import rdMolDraw2D
from rdkit.Chem.Draw import IPythonConsole
from IPython.display import SVG,display









    



:0: FutureWarning: IPython widgets are experimental and may change in the future.



In [57]:

    
# my_file = 'save_data_pos_exudates_only.pkl'
# my_file = 'save_data_neg_exudates_only.pkl'
# my_file = 'save_data_pos_cassettes.pkl'

my_file = '20160203_KBL-BC_Root-Exudate_Hilic_QExactive_Trial-Run_POS.pkl'
# my_file = 'save_data_neg_archetypes_hilic.pkl'
# my_file = 'save_data_pos_archetypes_hilic_istds.pkl'
# project_label = 'kate_exudates_metatlas_exudates_neg_only'
project_label = '20160203_KBL-BC_Root-Exudate_Hilic_QExactive_Trial-Run_POS'

with open(my_file,'r') as f:
    data = dill.load(f)



In [58]:

    
print data[1][1].keys()
print data[1][1]['data'].keys()









    



---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-58-06f5709d60c9> in <module>()
----> 1 print data[1][1].keys()
      2 print data[1][1]['data'].keys()

IndexError: list index out of range



In [59]:

    
data[0][0]['data']









    Out[59]:





{'eic': {'intensity': array([  36452.359375 ,   42779.6484375,   43072.5      ,   44875.3046875,
           51501.40625  ,   52883.90625  ,   43779.25     ,   46523.34375  ,
           52715.28125  ,   41145.25     ,   42183.5      ,   55746.8125   ,
           47116.6875   ,   51913.9375   ,   52764.9375   ,   42523.6875   ,
           46135.25     ,   58624.0625   ,   52808.5625   ,   51610.4375   ,
           56146.       ,   49009.5      ,   56028.875    ,   63760.25     ,
           68257.625    ,   68902.875    ,   60446.625    ,   72178.875    ,
           69075.125    ,   73171.25     ,   66146.375    ,   71876.125    ,
           61937.625    ,   66382.5      ,   65416.875    ,   60317.625    ,
           73414.375    ,   69989.375    ,   82281.       ,   72776.       ,
           65287.5      ,   76094.5      ,   79015.5      ,   80047.5      ,
           75834.       ,   80642.       ,   87679.5      ,   90043.75     ,
           84992.25     ,   93403.75     ,   90623.75     ,   96896.25     ,
           85095.75     ,   78321.75     ,   98112.75     ,   93813.25     ,
           90347.       ,   95051.       ,   93384.25     ,   88346.       ,
           92478.75     ,   87061.25     ,   85169.       ,   89928.       ,
           89845.       ,   89907.5      ,   93458.       ,   91719.5      ,
           85857.       ,   87537.       ,   91374.       ,  100284.5      ,
           95995.       ,   97828.5      ,  112024.       ,  120759.5      ,
          133676.       ,  139421.5      ,  159750.       ,  139535.5      ,
          159128.5      ,  144274.       ,  164378.       ,  197435.5      ,
          203189.5      ,  205582.       ,  244207.       ,  226116.       ,
          274359.       ,  249786.       ,  281508.       ,  270667.       ,
          295139.       ,  313385.       ,  382430.       ,  368380.       ,
          342982.       ,  403726.       ,  405651.       ,  433632.       ,
          403802.       ,  410474.       ,  373302.       ], dtype=float32),
  'polarity': 1,
  'rt': array([  9.50059223,   9.51037598,   9.52017784,   9.52996922,
           9.53978825,   9.54959297,   9.55937767,   9.56917191,
           9.57896805,   9.58877563,   9.59858036,   9.60837173,
           9.61816311,   9.62796307,   9.63776779,   9.64754295,
           9.65732574,   9.66711998,   9.67691517,   9.68672371,
           9.69654465,   9.70636749,   9.71615124,   9.7259388 ,
           9.73574257,   9.74552631,   9.75531387,   9.76511765,
           9.77492237,   9.78472996,   9.79455566,   9.80435753,
           9.81413841,   9.82393169,   9.83372593,   9.84350777,
           9.85328865,   9.86311531,   9.87292385,   9.88273907,
           9.89252377,   9.9023428 ,   9.91212654,   9.92193699,
           9.93173409,   9.94150734,   9.95131207,   9.96111393,
           9.97093201,   9.98071384,   9.99052143,  10.00032043,
          10.01012039,  10.019907  ,  10.02968693,  10.03948879,
          10.04926586,  10.05906963,  10.06885338,  10.07864475,
          10.08847618,  10.09825897,  10.10805702,  10.11783886,
          10.12761784,  10.13741016,  10.14718533,  10.1569891 ,
          10.16677761,  10.17657375,  10.1863842 ,  10.19618893,
          10.20597553,  10.21577168,  10.22555542,  10.23535824,
          10.24514103,  10.25493526,  10.26473331,  10.27452755,
          10.28432369,  10.29411221,  10.30390453,  10.31362629,
          10.32331753,  10.33291435,  10.34248543,  10.3520031 ,
          10.36151028,  10.37102413,  10.38057423,  10.39009285,
          10.39957047,  10.40915108,  10.41869736,  10.42828274,
          10.4378624 ,  10.44756031,  10.45726967,  10.4670372 ,
          10.4767971 ,  10.48659897,  10.49640083], dtype=float32)},
 'ms1_summary': {'mz_centroid': 104.07097,
  'mz_peak': 104.07101,
  'peak_area': 3432043.2,
  'peak_height': 100284.71,
  'polarity': 1,
  'rt_centroid': 10.011698,
  'rt_peak': 10.196189},
 'msms': {'data': [], 'most_intense_precursor': [], 'polarity': []}}



In [60]:

    
import re
export_group_names = []
for i,d in enumerate(data):
    newstr = d[0]['group'].name
    export_group_names.append(newstr)
# print export_group_names



In [61]:

    
import re
export_file_names = []
for i,d in enumerate(data):
    newstr = os.path.basename(d[0]['lcmsrun'].hdf5_file)
    export_file_names.append(newstr)
# print export_file_names



In [62]:

    
import re
from collections import defaultdict

export_compound_names = []
export_compound_objects = []
for i,d in enumerate(data[0]):
    export_compound_objects.append(d['identification'])
    if len(d['identification'].compound) > 0:
        str = d['identification'].compound[0].name
    else:
        str = d.name
    newstr = '%s_%s_%s_%5.2f'%(str,d['identification'].mz_references[0].detected_polarity,d['identification'].mz_references[0].adduct,d['identification'].rt_references[0].rt_peak)
    newstr = re.sub('\.', 'p', newstr) #2 or more in regexp

    newstr = re.sub('[\[\]]','',newstr)
    newstr = re.sub('[^A-Za-z0-9+-]+', '_', newstr)
    newstr = re.sub('i_[A-Za-z]+_i_', '', newstr)
    if newstr[0] == '_':
        newstr = newstr[1:]
    if newstr[0] == '-':
        newstr = newstr[1:]
    if newstr[-1] == '_':
        newstr = newstr[:-1]

    newstr = re.sub('[^A-Za-z0-9]{2,}', '', newstr) #2 or more in regexp
    export_compound_names.append(newstr)

#If duplicate compound names exist, then append them with a number
D = defaultdict(list)
for i,item in enumerate(export_compound_names):
    D[item].append(i)
D = {k:v for k,v in D.items() if len(v)>1}
for k in D.keys():
    for i,f in enumerate(D[k]):
        export_compound_names[f] = '%s%d'%(export_compound_names[f],i)

for i, e in enumerate (export_compound_names):
    print i,e









    



0 2-aminoisobutyrate_positive_M+H10p00
1 4-methylumbelliferone_positive_M+H1p50
2 5-aminopentanoate_positive_M+H12p10
3 2abscisate_positive_M+H1p40
4 2abscisate_positive_M+H2p00
5 adenine_positive_M+H4p30
6 L-alanine_positive_M+H11p10
7 L-arginine_positive_M+H21p10
8 L-asparagine_positive_M+H12p60
9 glycine_betaine_positive_M+H6p50
10 biotin_positive_M+H7p30
11 5-O-caffeoyl-D-quinate_positive_M+H12p00
12 choline_positive_M+H6p70
13 R4-hydroxymandelate_positive_M+H5p40
14 cytidine_positive_M+H7p60
15 D-mannitol_positive_M+H9p60
16 alphaD-fructopyranose_positive_M+H8p60
17 glycine_positive_M+H12p10
18 guanine_positive_M+H7p60
19 L-homoserine_positive_M+H11p70
20 indole-3-acetate_positive_M+H4p50
21 inosine_positive_M+H6p80
22 L-isoleucine_positive_M+H7p70
23 L-isoleucine_positive_M+H7p80
24 L-aspartate_positive_M+H14p000
25 L-aspartate_positive_M+H14p001
26 L-carnitine_positive_M+H9p50
27 L-citrulline_positive_M+H13p00
28 L-cystine_positive_M+H16p10
29 L-glutamate_positive_M+H13p70
30 4-hydroxy-D-proline_positive_M+H10p90
31 L-tyrosine_positive_M+H9p90
32 L-leucine_positive_M+H7p30
33 L-leucine_positive_M+H7p40
34 L-lysine_positive_M+H20p90
35 alphamaltose_positive_M+H12p20
36 alphamaltose_positive_M+H12p80
37 L-methionine_positive_M+H8p50
38 acetylalphaD-glucosamine_positive_M+H7p50
39 nicotinate_positive_M+H6p10
40 L-ornithine_positive_M+H21p30
41 palmitoleate_positive_M+H1p30
42 pantothenate_positive_M+H7p50
43 L-phenylalanine_positive_M+H7p30
44 L-proline_positive_M+H8p60
45 raffinose_positive_M+H14p20
46 alphaL-rhamnopyranose_positive_M+H5p70
47 riboflavin_positive_M+H4p80
48 sucrose_positive_M+H11p70
49 thiamin_positive_M+H8p30
50 L-threonine_positive_M+H11p40
51 L-tryptophan_positive_M+H8p40
52 uracil_positive_M+H3p00
53 uridine_positive_M+H4p90
54 L-valine_positive_M+H8p90
55 vanillin_positive_M+H1p40



In [63]:

    
def plot_chromatogram(d,file_name, ax=None):
    import numpy as np
    from textwrap import wrap
    if ax is None:
        ax = plt.gca()

    plt.rcParams['pdf.fonttype']=42
    plt.rcParams['pdf.use14corefonts'] = True
    plt.rcParams['text.usetex'] = False
    plt.rcParams.update({'font.size': 12})
    plt.rcParams.update({'font.weight': 'bold'})
    plt.rcParams['axes.linewidth'] = 2 # set the value globally

    rt_min = d['identification'].rt_references[0].rt_min
    rt_max = d['identification'].rt_references[0].rt_max
    rt_peak = d['identification'].rt_references[0].rt_peak
        
    if len(d['data']['eic']['rt']) > 0:
        x = d['data']['eic']['rt']
        y = d['data']['eic']['intensity']
        ax.plot(x,y,'k-',linewidth=2.0,alpha=1.0)  
        myWhere = np.logical_and(x>=rt_min, x<=rt_max )
        ax.fill_between(x,0,y,myWhere, facecolor='c', alpha=0.3)

    ax.axvline(rt_min, color='k',linewidth=2.0)
    ax.axvline(rt_max, color='k',linewidth=2.0)
    ax.axvline(rt_peak, color='r',linewidth=2.0)
#     ax.set_xlabel('Time (min)',weight='bold')
#     ax.set_ylabel('Intensity (au)',weight='bold')

    ax.set_title("\n".join(wrap(file_name,54)),fontsize=12,weight='bold')

def plot_all_chromatograms_all_files(data,nCols,export_file_names,export_compound_names,share_y,project_label):
    import time
    d = 'data/%s/chromatograms/'%project_label
    if not os.path.exists(d):
        os.makedirs(d)
    nRows = int(np.ceil(len(export_file_names)/float(nCols)))
    for compound_idx,compound in enumerate(export_compound_names):
        starttime = time.time()
        f, ax = plt.subplots(nRows, nCols, sharey=share_y,figsize=(8*nCols,nRows * 6)) #original 8 x 6
#         plt.tight_layout()
        print "figure created in ", time.time() - starttime, " seconds"
        ax = ax.flatten()
        parttime = time.time()
        for i,fname in enumerate(export_file_names):
            p = plot_chromatogram(data[i][compound_idx], fname, ax=ax[i])
        print i, "subplots created in ", time.time() - parttime, " seconds"
#         parttime = time.time()
#         print "tight layout is ", time.time() - parttime, " seconds"
        parttime = time.time()
        f.savefig('%s%s.pdf'%(d,export_compound_names[compound_idx]))
        print "time to save figure is ", time.time() - parttime, " seconds"
        parttime = time.time()
#         f.clear()
        plt.close('all')#f.clear()
        print "time to clear and close figure is ", time.time() - parttime, " seconds"
        print " "


def plot_all_compounds_for_each_file(data,nCols,export_file_names,export_compound_names,share_y,project_label):
    import time
    d = 'data/%s/one_file_chromatograms/'%project_label
    if not os.path.exists(d):
        os.makedirs(d)
    nRows = int(np.ceil(len(export_compound_names)/float(nCols)))
    for file_idx,my_file in enumerate(export_file_names):
        f, ax = plt.subplots(nRows, nCols, sharey=share_y,figsize=(8*nCols,nRows * 6)) #original 8 x 6
#         plt.tight_layout()
        ax = ax.flatten()
        for i,compound in enumerate(export_compound_names):
            p = plot_chromatogram(data[file_idx][i], compound, ax=ax[i])
        f.savefig('%s%s.pdf'%(d,export_file_names[file_idx]))
        plt.close('all')#f.clear()



In [64]:

    
""" contribution from Hans de Winter """
def _InitialiseNeutralisationReactions():
    patts= (
        # Imidazoles
        ('[n+;H]','n'),
        # Amines
        ('[N+;!H0]','N'),
        # Carboxylic acids and alcohols
        ('[$([O-]);!$([O-][#7])]','O'),
        # Thiols
        ('[S-;X1]','S'),
        # Sulfonamides
        ('[$([N-;X2]S(=O)=O)]','N'),
        # Enamines
        ('[$([N-;X2][C,N]=C)]','N'),
        # Tetrazoles
        ('[n-]','[nH]'),
        # Sulfoxides
        ('[$([S-]=O)]','S'),
        # Amides
        ('[$([N-]C=O)]','N'),
        )
    return [(Chem.MolFromSmarts(x),Chem.MolFromSmiles(y,False)) for x,y in patts]

_reactions=None
def NeutraliseCharges(mol, reactions=None):
    global _reactions
    if reactions is None:
        if _reactions is None:
            _reactions=_InitialiseNeutralisationReactions()
        reactions=_reactions
#     mol = Chem.MolFromSmiles(smiles)
    replaced = False
    for i,(reactant, product) in enumerate(reactions):
        while mol.HasSubstructMatch(reactant):
            replaced = True
            rms = AllChem.ReplaceSubstructs(mol, reactant, product)
            rms_smiles = Chem.MolToSmiles(rms[0])
            mol = Chem.MolFromSmiles(rms_smiles)
    if replaced:
        return (mol, True) #Chem.MolToSmiles(mol,True)
    else:
        return (mol, False)
def drawStructure_ShowingFragment(pactolus_tree,fragment_idx,myMol,myMol_w_Hs):

    drawer = rdMolDraw2D.MolDraw2DSVG(600,300)

    fragment_atoms = np.where(pactolus_tree[fragment_idx]['atom_bool_arr'])[0]
    mark_atoms_no_H = []
    for a_index in fragment_atoms:
        if myMol_w_Hs.GetAtomWithIdx(a_index).GetSymbol() != 'H':
            mark_atoms_no_H.append(a_index)

    rdDepictor.Compute2DCoords(myMol)

    drawer.DrawMolecule(myMol,highlightAtoms=mark_atoms_no_H)
    drawer.FinishDrawing()
    svg = drawer.GetDrawingText().replace('svg:','')
    return svg

def drawStructure_Fragment(pactolus_tree,fragment_idx,myMol,myMol_w_Hs):
    fragment_atoms = np.where(pactolus_tree[fragment_idx]['atom_bool_arr'])[0]
    depth_of_hit = np.sum(pactolus_tree[fragment_idx]['bond_bool_arr'])
    mol2 = deepcopy(myMol_w_Hs)
    # Now set the atoms you'd like to remove to dummy atoms with atomic number 0
    fragment_atoms = np.where(pactolus_tree[fragment_idx]['atom_bool_arr']==False)[0]
    for f in fragment_atoms:
        mol2.GetAtomWithIdx(f).SetAtomicNum(0)

    # Now remove dummy atoms using a query
    mol3 = Chem.DeleteSubstructs(mol2, Chem.MolFromSmarts('[#0]'))
    mol3 = Chem.RemoveHs(mol3)
    # You get what you are looking for
    return moltosvg(mol3),depth_of_hit


def moltosvg(mol,molSize=(450,150),kekulize=True):
    mc = Chem.Mol(mol.ToBinary())
    if kekulize:
        try:
            Chem.Kekulize(mc)
        except:
            mc = Chem.Mol(mol.ToBinary())
    if not mc.GetNumConformers():
        rdDepictor.Compute2DCoords(mc)
    drawer = rdMolDraw2D.MolDraw2DSVG(molSize[0],molSize[1])
    drawer.DrawMolecule(mc)
    drawer.FinishDrawing()
    svg = drawer.GetDrawingText()
    # It seems that the svg renderer used doesn't quite hit the spec.
    # Here are some fixes to make it work in the notebook, although I think
    # the underlying issue needs to be resolved at the generation step
    return svg.replace('svg:','')

def get_ion_from_fragment(frag_info,spectrum):
    hit_indices = np.where(np.sum(frag_info,axis=1))
    hit = spectrum[hit_indices,:][0]
    return hit,hit_indices



In [65]:

    
#plot msms and annotate
#compound name
#formula
#adduct
#theoretical m/z
#histogram of retention times
#scatter plot of retention time with peak area
#retention time
#print all chromatograms
#structure

def file_with_max_precursor_intensity(data,compound_idx):
    idx = []
    my_max = 0
    for i,d in enumerate(data):
        temp = d[compound_idx]['data']['msms']['data']
        if d[compound_idx]['data']['msms']['polarity'] or d[compound_idx]['data']['msms']['polarity'] ==0 :
            m = np.max(temp['precursor_intensity'])
            if m > my_max:
                my_max = m
                idx = i
    return idx,my_max


def make_identification_figure(data,file_idx,compound_idx,export_name,project_label):
    d = 'data/%s/identification/'%project_label
    if not os.path.exists(d):
        os.makedirs(d)
    fig = plt.figure(figsize=(20,20))
#     fig = plt.figure()
    ax = fig.add_subplot(211)
    ax.set_title(data[file_idx][compound_idx]['identification'].compound[0].name,fontsize=12,weight='bold')
    ax.set_xlabel('m/z',fontsize=12,weight='bold')
    ax.set_ylabel('intensity',fontsize=12,weight='bold')

    mz = data[file_idx][compound_idx]['data']['msms']['data']['mz']
    zeros = np.zeros(data[file_idx][compound_idx]['data']['msms']['data']['mz'].shape)
    intensity = data[idx][compound_idx]['data']['msms']['data']['i']
    ax.vlines(mz,zeros,intensity,colors='r',linewidth = 2)
    sx = np.argsort(intensity)[::-1]
    labels = [1.001e9]
    for i in sx:
        if np.min(np.abs(mz[i] - labels)) > 0.1 and intensity[i] > 0.02 * np.max(intensity):
            ax.annotate('%5.4f'%mz[i], xy=(mz[i], 1.01*intensity[i]),rotation = 90, horizontalalignment = 'center', verticalalignment = 'left')
            labels.append(mz[i])

    plt.tight_layout()
    L = plt.ylim()
    plt.ylim(L[0],L[1]*1.12)
   
    inchi =  data[file_idx][compound_idx]['identification'].compound[0].inchi
    myMol = Chem.MolFromInchi(inchi.encode('utf-8'))
    myMol,neutralised = NeutraliseCharges(myMol)
    image = Draw.MolToImage(myMol, size = (300,300) )
    ax2 = fig.add_subplot(223)
    ax2.imshow(image)
    ax2.axis('off')
    #     SVG(moltosvg(myMol))

    ax3 = fig.add_subplot(224)
    ax3.set_xlim(0,1)
    mz_theoretical = data[file_idx][compound_idx]['identification'].mz_references[0].mz
    mz_measured = data[file_idx][compound_idx]['data']['ms1_summary']['mz_centroid']
    if not mz_measured:
        mz_measured = 0

    delta_mz = abs(mz_theoretical - mz_measured)
    delta_ppm = delta_mz / mz_theoretical * 1e6
    
    rt_theoretical = data[file_idx][compound_idx]['identification'].rt_references[0].rt_peak
    rt_measured = data[file_idx][compound_idx]['data']['ms1_summary']['rt_centroid']
    if not rt_measured:
        rt_measured = 0
    ax3.text(0,1,'%s'%os.path.basename(data[file_idx][compound_idx]['lcmsrun'].hdf5_file),fontsize=12)
    ax3.text(0,0.95,'%s %s'%(data[file_idx][compound_idx]['identification'].compound[0].name, data[file_idx][compound_idx]['identification'].mz_references[0].adduct),fontsize=12)
    ax3.text(0,0.9,'m/z theoretical = %5.4f, measured = %5.4f, %5.4f ppm difference'%(mz_theoretical, mz_measured, delta_ppm),fontsize=12)
    ax3.text(0,0.85,'Expected Elution of %5.2f minutes, %5.2f min actual'%(rt_theoretical,rt_measured),fontsize=12)
    ax3.set_ylim(0.2,1.01)
    ax3.axis('off')
#     plt.show()
    fig.savefig('%sIdentifications_%s.pdf'%(d,export_name))
    fig.clear()
    plt.close('all')#f.clear()



In [66]:

    
inchi =  '%s'%data[idx][9]['identification'].compound[0].InChI
myMol = Chem.MolFromInchi(inchi.encode('utf-8'))
SVG(moltosvg(myMol))









    



---------------------------------------------------------------------------
AttributeError                            Traceback (most recent call last)
<ipython-input-66-b2296acbf8e2> in <module>()
----> 1 inchi =  '%s'%data[idx][9]['identification'].compound[0].InChI
      2 myMol = Chem.MolFromInchi(inchi.encode('utf-8'))
      3 SVG(moltosvg(myMol))

/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages/metatlas/metatlas_objects.pyc in __getattribute__(self, name)
    233         """Automatically resolve stubs on demand.
    234         """
--> 235         value = super(MetatlasObject, self).__getattribute__(name)
    236         if isinstance(value, Stub) and FETCH_STUBS:
    237             value = value.retrieve()

AttributeError: 'Compound' object has no attribute 'InChI'



In [67]:

    
# column name list
# row name list
import pandas as pd
import os
peak_height = pd.DataFrame( index=export_compound_names, columns=export_file_names, dtype=float)
peak_area = pd.DataFrame( index=export_compound_names, columns=export_file_names, dtype=float)

# peak_height['compound'] = compound_list
# peak_height.set_index('compound',drop=True)
for i,dd in enumerate(data):
    for j,d in enumerate(dd):
        if not d['data']['ms1_summary']['peak_height']:
            peak_height.ix[export_compound_names[j],export_file_names[i]] = 0
            peak_area.ix[export_compound_names[j],export_file_names[i]] = 0
        else:
            peak_height.ix[export_compound_names[j],export_file_names[i]] = d['data']['ms1_summary']['peak_height']  
            peak_area.ix[export_compound_names[j],export_file_names[i]] = d['data']['ms1_summary']['peak_area']
columns = []
for i,f in enumerate(export_file_names):
    columns.append((export_group_names[i],f))
peak_height.columns = pd.MultiIndex.from_tuples(columns,names=['group', 'file'])
peak_area.columns = pd.MultiIndex.from_tuples(columns,names=['group', 'file'])



In [68]:

    
d = 'data/%s/sheets/'%project_label
if not os.path.exists(d):
    os.makedirs(d)
peak_height.to_csv('data/%s/sheets/compounds peak height.tab'%project_label,sep='\t')
peak_area.to_csv('data/%s/sheets/compounds peak area.tab'%project_label,sep='\t')



In [69]:

    
from matplotlib import pyplot as plt

def plot_errorbar_plots(df,compound_list,project_label):
    d = 'data/%s/error_bar/'%project_label
    if not os.path.exists(d):
        os.makedirs(d)
    for compound in compound_list:
        m = df.ix[compound].groupby(level='group').mean()
        e = df.ix[compound].groupby(level='group').std()
        c = df.ix[compound].groupby(level='group').count()

        for i in range(len(e)):
            if c[i]>0:
                e[i] = e[i] / c[i]**0.5

        f, ax = plt.subplots(1, 1,figsize=(20,12))
        m.plot(yerr=e, kind='bar',ax=ax)
        ax.set_title(compound,fontsize=12,weight='bold')
        plt.tight_layout()
        f.savefig('%sErrorBar_%s.pdf'%(d,compound))
        f.clear()
        plt.close('all')#f.clear()



In [70]:

    
idx, m = file_with_max_precursor_intensity(data,compound_idx)
print idx 
print m









    



0
2.31519e+06



In [51]:

    
for compound_idx in range(len(export_compound_names)):
    idx, m = file_with_max_precursor_intensity(data,compound_idx)
    if m:
        make_identification_figure(data,idx,compound_idx,export_compound_names[compound_idx],project_label)

plot_errorbar_plots(peak_height,export_compound_names,project_label)

nCols = 2
compound_idx = 9
share_y=True
t = plot_all_chromatograms_all_files(data,nCols,export_file_names,export_compound_names,share_y,project_label)









    



---------------------------------------------------------------------------
IndexError                                Traceback (most recent call last)
<ipython-input-51-f415e110b535> in <module>()
     13 nCols = 8
     14 share_y=False
---> 15 t = plot_all_compounds_for_each_file(data,nCols,export_file_names,export_compound_names,share_y,project_label)
     16 

<ipython-input-50-96a5af93de43> in plot_all_compounds_for_each_file(data, nCols, export_file_names, export_compound_names, share_y, project_label)
     70         ax = ax.flatten()
     71         for i,compound in enumerate(export_compound_names):
---> 72             p = plot_chromatogram(data[file_idx][i], compound, ax=ax[i])
     73         f.savefig('%s%s.pdf'%(d,export_file_names[compound_idx]))
     74         plt.close('all')#f.clear()

IndexError: index 8 is out of bounds for axis 0 with size 8



In [71]:

    
nCols = 8
share_y=False
t = plot_all_compounds_for_each_file(data,nCols,export_file_names,export_compound_names,share_y,project_label)



In [ ]: