In [ ]:
# 1) Convert files for Metatlas 2.0 (using mzCAT)
# 2) Transfer to NERSC (using mzCAT)
# 3) Make MetAtlas Groups for your files
# 4) Enter your internal standards into an Atlas
# 5) Extract data from each of your files for the internal standards
# 6) Export the results in a meaningful way

# If all goes according to plan that means on Thursday we will:
# 1) Build a small Atlas of your favorite molecules that have been identified.
# 2) Extract data from each of your files for these molecules
# 3) Export the results in a meaningful way

In [1]:
%matplotlib notebook
%config InlineBackend.figure_format = 'retina'

import sys,os,glob,csv
import numpy as np
from matplotlib import pylab as plt

from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import fcluster
from scipy.spatial.distance import squareform

sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )
from metatlas import metatlas_objects as metob
from metatlas import h5_query

# curr_ld_lib_path = ''
# os.environ['LD_LIBRARY_PATH'] = curr_ld_lib_path + ':/project/projectdirs/openmsi/jupyterhub_libs/boost_1_55_0/lib' + ':/project/projectdirs/openmsi/jupyterhub_libs/lib'
# sys.path.remove('/anaconda/lib/python2.7/site-packages')
# sys.path.append('/global/project/projectdirs/openmsi/jupyterhub_libs/anaconda/lib/python2.7/site-packages')
# sys.path.insert(0,'/project/projectdirs/openmsi/projects/meta-iq/pactolus/pactolus' )

# from rdkit import Chem
# from rdkit.Chem.rdMolDescriptors import ExactMolWt
# from rdkit.Chem import Descriptors
# from rdkit.Chem import rdMolDescriptors
# from rdkit.Chem import AllChem
# from rdkit.Chem.Fingerprints import FingerprintMols
# from rdkit.Chem import Draw
# from rdkit import DataStructs
# from rdkit.DataManip import Metric

# import networkx as nx

In [ ]:
atlases = metob.retrieve('Atlas',name='QExactive%')
myid = atlases[0]
# for c in myid.compound_identifications:
#     print c
cid = myid.compound_identifications[0]
cid.references
# type(cid.references[0].retrieve())
# cid.references[0].retrieve()
# metob.edit_objects(cid.references)


:0: FutureWarning: IPython widgets are experimental and may change in the future.
/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages/pandas/core/internals.py:956: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.
  return self._try_coerce_result(func(values, other))

In [36]:
# [{'InChI': u'InChI=1S/C7H6O3/c8-6-4-2-1-3-5(6)7(9)10/h1-4,8H,(H,9,10)/p-1',
#   'MonoIsotopic_molecular_weight': 138.032,
#   'creation_time': '2015-10-08T17:37:14',
#   'description': u'',
#   'formula': u'C7H6O3',
#   'functional_sets': [],
#   'last_modified': '2015-10-08T17:38:12',
#   'name': u'salicylate',
#   'prev_uid': u'',
#   'reference_xrefs': [],
#   'synonyms': u'',
#   'unique_id': u'75423646be08414a8a6337ca6a7120ce',
#   'url': u'',
#   'username': u'bpb'}]

# #query for your compound, use any of the 
# query_comp = metob.retrieve('Compound',unique_id='75423646be08414a8a6337ca6a7120ce')
# for i,c in enumerate(query_comp):
#     print i,c.name, c.formula,c.InChI,c.synonyms
# query_comp

In [87]:
# These are the compounds to add to the database.  
# In reality, a person wouldn't have this nice list, but would need to search by keywords or other filters.  QGrid might be really handy.
compounds = [
    '3,6-Dihydroxy-4-methylpyridazine',
'd5-benzoic acid',
'4-(3,3-dimethyl-ureido)benzoic acid',
'9-anthracene carboxylic acid',
'13C-15N-L-phenylalanine',
'13C-glucose',
'd8-lysine',
'd4 lysine',
'ABMBA'
]
myCompounds = []
for mycompound in compounds:
    query_comp = metob.retrieve('Compound',name=mycompound)
    for c in query_comp:
        myCompounds.append(c)
        print c.name,c.formula,c.MonoIsotopic_molecular_weight,c.description, c.InChI  
        print " "


3,6-Dihydroxy-4-methylpyridazine C5H6N2O2 126.043 https://pubchem.ncbi.nlm.nih.gov/compound/79826 InChI=1S/C5H6N2O2/c1-3-2-4(8)6-7-5(3)9/h2H,1H3,(H,6,8)(H,7,9)
 
d5-benzoic acid C7H6O2 127.068 https://pubchem.ncbi.nlm.nih.gov/compound/71603 InChI=1S/C7H6O2/c8-7(9)6-4-2-1-3-5-6/h1-5H,(H,8,9)/i1D,2D,3D,4D,5D
 
4-(3,3-dimethyl-ureido)benzoic acid C10H12N2O3 208.085 http://www.chemicalize.org/structure/#!mol=CN%28C%29C%28%3DO%29Nc1ccc%28cc1%29C%28%3DO%29O&source=fp InChI=1S/C10H12N2O3/c1-12(2)10(15)11-8-5-3-7(4-6-8)9(13)14/h3-6H,1-2H3,(H,11,15)(H,13,14)
 
9-anthracene carboxylic acid C15H10O2 222.068 https://pubchem.ncbi.nlm.nih.gov/compound/9-Anthroic_acid InChI=1S/C15H10O2/c16-15(17)14-12-7-3-1-5-10(12)9-11-6-2-4-8-13(11)14/h1-9H,(H,16,17)
 
13C-15N-L-phenylalanine C9H11NO2 175.106 https://pubchem.ncbi.nlm.nih.gov/compound/16217565 InChI=1S/C9H11NO2/c10-8(9(11)12)6-7-4-2-1-3-5-7/h1-5,8H,6,10H2,(H,11,12)/t8-/m0/s1/i1+1,2+1,3+1,4+1,5+1,6+1,7+1,8+1,9+1,10+1
 
13C-glucose C6H12O6 186.084 https://pubchem.ncbi.nlm.nih.gov/compound/10954241#section=Top InChI=1S/C6H12O6/c7-1-2-3(8)4(9)5(10)6(11)12-2/h2-11H,1H2/t2-,3-,4+,5-,6?/m1/s1/i1+1,2+1,3+1,4+1,5+1,6+1
 
d8-lysine C6H14N2O2 154.156 http://www.chemicalize.org/structure/#!mol=%5B2H%5DC%28%5B2H%5D%29%28C%28C%28%3DO%29O%29N%29C%28%5B2H%5D%29%28%5B2H%5D%29C%28%5B2H%5D%29%28%5B2H%5D%29C%28%5B2H%5D%29%28%5B2H%5D%29N&source=fp InChI=1/C6H14N2O2/c7-4-2-1-3-5(8)6(9)10/h5H,1-4,7-8H2,(H,9,10)/i1D2,2D2,3D2,4D2
 
d4 lysine C6H14N2O2 150.131 http://www.chemicalize.org/structure/#!mol=%5B2H%5DC%28%5B2H%5D%29%28CC%28C%28%3DO%29O%29N%29C%28%5B2H%5D%29%28%5B2H%5D%29CN&source=fp InChI=1/C6H14N2O2/c7-4-2-1-3-5(8)6(9)10/h5H,1-4,7-8H2,(H,9,10)/i1D2,2D2
 
ABMBA C8H8BrNO2 228.974 https://pubchem.ncbi.nlm.nih.gov/compound/2774400 InChI=1S/C8H8BrNO2/c1-4-2-5(8(11)12)7(10)6(9)3-4/h2-3H,10H2,1H3,(H,11,12)
 

In [44]:
metob.RtReference()


Out[44]:
{'RTUnits': 'sec',
 'RTmax': 0.0,
 'RTmin': 0.0,
 'RTpeak': 0.0,
 'creation_time': '2015-10-15T21:34:39',
 'description': u'No description',
 'enabled': True,
 'last_modified': '2015-10-15T21:34:39',
 'lcms_run': None,
 'name': u'Untitled',
 'prev_uid': u'',
 'ref_type': u'',
 'unique_id': u'0f844fb9664144349dcea290c1579204',
 'username': u'bpb'}

In [88]:
rt = [3.0,19.97]
all_identifications = []
for i,c in enumerate(myCompounds):
    mzRef = metob.MzReference()
    mzRef.mz = c.MonoIsotopic_molecular_weight + 1.007276
    mzRef.mz_tolerance = 20
    mzRef.mz_tolerance_units = 'ppm'
    mzRef.detected_polarity = 'positive'
    mzRef.adduct = '[M+H]+'
    
#     rtRef = metob.RtReference()
#     rtRef.RTUnits = 'min'
#     rtRef.RTmin = rt[i] - 1.5
#     rtRef.RTmax = rt[i] + 1.5
#     rtRef.RTpeak = rt[i]
    
    myID = metob.CompoundIdentification()
    myID.compound = c
    myID.references = [mzRef]#,rtRef]
    all_identifications.append(myID)

myAtlas = metob.Atlas()
#metob.Atlas() has "compound_identifications" and a "name"
myAtlas.name = 'All Encompasing Internal Standards Positive Mode'
myAtlas.compound_identifications = all_identifications
metob.store(myAtlas)

In [34]:
%%javascript
var nb = IPython.notebook;
var kernel = IPython.notebook.kernel;
var command = "NOTEBOOK_FULL_PATH = '" + nb.base_url + nb.notebook_path + "'";
kernel.execute(command);



In [35]:
filename = os.path.basename(NOTEBOOK_FULL_PATH)
%system cp $filename /project/projectdirs/openmsi/www/
temp = '%s/%s'%('/project/projectdirs/openmsi/www',filename)
%system chmod 775 $temp
print 'http://nbviewer.ipython.org/url/portal.nersc.gov/project/openmsi/%s?flush_cache=true'%filename


http://nbviewer.ipython.org/url/portal.nersc.gov/project/openmsi/MetAtlas_002_Make_Atlas.ipynb?flush_cache=true

In [ ]: