In [ ]:
# 1) Convert files for Metatlas 2.0 (using mzCAT)
# 2) Transfer to NERSC (using mzCAT)
# 3) Make MetAtlas Groups for your files
# 4) Enter your internal standards into an Atlas
# 5) Extract data from each of your files for the internal standards
# 6) Export the results in a meaningful way

# If all goes according to plan that means on Thursday we will:
# 1) Build a small Atlas of your favorite molecules that have been identified.
# 2) Extract data from each of your files for these molecules
# 3) Export the results in a meaningful way

In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import sys,os,glob,csv
import numpy as np
from matplotlib import pylab as plt

from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import fcluster
from scipy.spatial.distance import squareform

sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )
from metatlas import metatlas_objects as metob
from metatlas import h5_query

curr_ld_lib_path = ''

os.environ['LD_LIBRARY_PATH'] = curr_ld_lib_path + ':/project/projectdirs/openmsi/jupyterhub_libs/boost_1_55_0/lib' + ':/project/projectdirs/openmsi/jupyterhub_libs/lib'
# sys.path.remove('/anaconda/lib/python2.7/site-packages')
sys.path.append('/global/project/projectdirs/openmsi/jupyterhub_libs/anaconda/lib/python2.7/site-packages')
sys.path.insert(0,'/project/projectdirs/openmsi/projects/meta-iq/pactolus/pactolus' )

from rdkit import Chem
# from rdkit.Chem.rdMolDescriptors import ExactMolWt
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import AllChem
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem import Draw
from rdkit import DataStructs
from rdkit.DataManip import Metric

import networkx as nx

In [8]:
myFiles = metob.retrieve('lcmsrun',description='%20150826%')
# myFiles
for f in myFiles:
    print f.hdf5_file


/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exu_cassette_2__R6_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exu_cassette_10__R14_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_FPS_MSMS_ACN__ITSD_MidCassettes__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exu_cassette_8__R12_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exudate_1_3x_R1_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exu_cassette_3__R7_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__MeOHBlank_Pre_3__8ul_150826162701.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exu_cassette_8__R12_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exu_cassette_1__R5_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exudate_2_3x_R2_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__MeOHBlank_Post_2__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__MeOHBlank_Pre_6__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exu_cassette_5__R9_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exu_cassette_5__R9_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exu_cassette_4__R8_8ul_150827152957.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exu_cassette_9__R13_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_FPS_MSMS_ACN__ITSD_Pre_4__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exu_cassette_1__R5_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exu_cassette_7__R11_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exudate_3_3x_R3_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exu_cassette_4__R8_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__MeOHBlank_Pre_3__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exudate_4_3x_R4_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exu_cassette_9__R13_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exudate_1_3x_R1_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__MeOHBlank_Post_3__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__MeOHBlank_PostCrash1__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exu_cassette_2__R6_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_FPS_MSMS_ACN__QCMix_Pre_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__MeOHBlank_Post_1__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exudate_4_3x_R4_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exu_cassette_3__R7_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exu_cassette_10__R14_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exu_cassette_7__R11_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exu_cassette_6__R10_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__Root_exu_cassette_6__R10_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__MeOHBlank_PostCrash2__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exu_cassette_3__R7_8ul_redo.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__MeOHBlank_Pre_5__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__MeOHBlank_Post_4__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_FPS_MSMS_ACN__ITSD_POST_4__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__MeOHBlank_Pre_2__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_POS_MSMS_ACN__MeOHBlank_Pre_1__8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_FPS_MSMS_ACN__QCMix_POST_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exudate_2_3x_R2_8ul.h5
/global/project/projectdirs/metatlas//raw_data/katezh/20150826_KZH_pHILIC_QExactive_ExudatesAndCassettes/20150826_pHILIC_NEG_MSMS_ACN__Root_exudate_3_3x_R3_8ul.h5

In [13]:
polarity = []
sample = []

for f in myFiles:
    fp = os.path.split(f.hdf5_file.replace('.h5',''))
    sfp = fp[1].split('_')
    polarity.append(sfp[2])
    if 'Blank' in fp[1]:
        sample.append('Blank')
    elif 'QC' in fp[1]:
        sample.append('QC')        
    elif 'ITSD' in fp[1]:
        sample.append('ISTD')
    elif '_exudate_' in fp[1]:
        sample.append('Root Exudate')
    else:
        ssfp = fp[1].split('__')
        sample.append(ssfp[1])
ugroups = np.unique(sample)    
print ugroups


[u'Blank' u'ISTD' u'QC' u'Root Exudate' u'Root_exu_cassette_1'
 u'Root_exu_cassette_10' u'Root_exu_cassette_2' u'Root_exu_cassette_3'
 u'Root_exu_cassette_4' u'Root_exu_cassette_5' u'Root_exu_cassette_6'
 u'Root_exu_cassette_7' u'Root_exu_cassette_8' u'Root_exu_cassette_9']

In [14]:
#make the groups in the database and simultaneously populate the runs
all_my_groups = []
for j,u in enumerate(ugroups):
    indices = [i for i, x in enumerate(sample) if x == u]
    myGroup = metob.Group()
    myGroup.name = u
    myGroup.description = ugroups[j]
    file_set = []
    for index in indices:
        file_set.append(myFiles[index])
    myGroup.items = file_set
    all_my_groups.append(myGroup)

# don't forget to make the "experiment" group: the group of groups

metob.store(all_my_groups)

In [28]:
len(all_my_groups)


Out[28]:
22

In [15]:
myGroup = metob.Group()
myGroup.name = '20150826_KZH_QExactive_Hilic_Avena_exudates_and_standards'
myGroup.description = 'These samples are hydroponic Avena exudates collected over three, six, nine and 12 weeks in four biological replicates and two experimental controls. The aim is to define exudation profile of Avena barbata.'
myGroup.items = all_my_groups
metob.store(myGroup)

In [16]:
%%javascript
var nb = IPython.notebook;
var kernel = IPython.notebook.kernel;
var command = "NOTEBOOK_FULL_PATH = '" + nb.base_url + nb.notebook_path + "'";
kernel.execute(command);



In [17]:
filename = os.path.basename(NOTEBOOK_FULL_PATH)
%system cp $filename /project/projectdirs/openmsi/www/
temp = '%s/%s'%('/project/projectdirs/openmsi/www',filename)
%system chmod 775 $temp
print 'http://nbviewer.ipython.org/url/portal.nersc.gov/project/openmsi/%s?flush_cache=true'%filename


http://nbviewer.ipython.org/url/portal.nersc.gov/project/openmsi/MetAtlas_001_Group_Files_Kate_Exudates.ipynb?flush_cache=true

In [ ]: