notebook.community

Edit and run



In [ ]:

    
# 1) Convert files for Metatlas 2.0 (using mzCAT)
# 2) Transfer to NERSC (using mzCAT)
# 3) Make MetAtlas Groups for your files
# 4) Enter your internal standards into an Atlas
# 5) Extract data from each of your files for the internal standards
# 6) Export the results in a meaningful way

# If all goes according to plan that means on Thursday we will:
# 1) Build a small Atlas of your favorite molecules that have been identified.
# 2) Extract data from each of your files for these molecules
# 3) Export the results in a meaningful way



In [1]:

    
%matplotlib inline
%config InlineBackend.figure_format = 'retina'

import sys,os,glob,csv
import numpy as np
from matplotlib import pylab as plt

from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import fcluster
from scipy.spatial.distance import squareform

sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )
from metatlas import metatlas_objects as metob
from metatlas import h5_query

curr_ld_lib_path = ''

os.environ['LD_LIBRARY_PATH'] = curr_ld_lib_path + ':/project/projectdirs/openmsi/jupyterhub_libs/boost_1_55_0/lib' + ':/project/projectdirs/openmsi/jupyterhub_libs/lib'
# sys.path.remove('/anaconda/lib/python2.7/site-packages')
sys.path.append('/global/project/projectdirs/openmsi/jupyterhub_libs/anaconda/lib/python2.7/site-packages')
sys.path.insert(0,'/project/projectdirs/openmsi/projects/meta-iq/pactolus/pactolus' )

from rdkit import Chem
# from rdkit.Chem.rdMolDescriptors import ExactMolWt
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import AllChem
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem import Draw
from rdkit import DataStructs
from rdkit.DataManip import Metric

import networkx as nx



In [11]:

    
myFiles = metob.retrieve('lcmsrun',description='%20150305_angelo_MSMS%')
print myFiles
for f in myFiles:
    print f.hdf5_file

[]



In [26]:

    
polarity = []
sample = []

for f in myFiles:
    fp = os.path.split(f.hdf5_file.replace('.h5',''))
    sfp = fp[1].split('_')
    polarity.append(sfp[3])
    if 'blank' in fp[1]:
        sample.append('Blank')
    elif 'QC' in fp[1]:
        sample.append('QC')        
    else:
        sample.append('%s_%s'%(sfp[4][:-1],sfp[5]))
ugroups = np.unique(sample)    
print ugroups









    



[u'10_18h' u'11_18h' u'12_18h' u'13_42h' u'14_42h' u'15_42h' u'16_42h'
 u'17_49h' u'18_49h' u'19_49h' u'1_3min' u'20_49h' u'2_3min' u'3_3min'
 u'4_3min' u'5_9h' u'6_9h' u'7_9h' u'8_9h' u'9_18h' u'Blank' u'QC']



In [29]:

    
#make the groups in the database and simultaneously populate the runs
all_my_groups = []
for j,u in enumerate(ugroups):
    indices = [i for i, x in enumerate(sample) if x == u]
    myGroup = metob.Group()
    myGroup.name = u
    myGroup.description = ugroups[j]
    file_set = []
    for index in indices:
        file_set.append(myFiles[index])
    myGroup.items = file_set
    all_my_groups.append(myGroup)

# don't forget to make the "experiment" group: the group of groups

metob.store(all_my_groups)



In [28]:

    
len(all_my_groups)









    Out[28]:





22



In [30]:

    
myGroup = metob.Group()
myGroup.name = '20151002_TLS_QExactive_Hilic_Crust Porewater'
myGroup.description = 'This crust wetup, timeseries is paired with metatranscriptomics to identify who is doing what during the course of a wetup.'
myGroup.items = all_my_groups
metob.store(myGroup)



In [31]:

    
%%javascript
var nb = IPython.notebook;
var kernel = IPython.notebook.kernel;
var command = "NOTEBOOK_FULL_PATH = '" + nb.base_url + nb.notebook_path + "'";
kernel.execute(command);



In [32]:

    
filename = os.path.basename(NOTEBOOK_FULL_PATH)
%system cp $filename /project/projectdirs/openmsi/www/
temp = '%s/%s'%('/project/projectdirs/openmsi/www',filename)
%system chmod 775 $temp
print 'http://nbviewer.ipython.org/url/portal.nersc.gov/project/openmsi/%s?flush_cache=true'%filename









    



http://nbviewer.ipython.org/url/portal.nersc.gov/project/openmsi/MetAtlas_001_Group_Files.ipynb?flush_cache=true



In [ ]: