In [ ]:
# 1) Convert files for Metatlas 2.0 (using mzCAT)
# 2) Transfer to NERSC (using mzCAT)
# 3) Make MetAtlas Groups for your files
# 4) Enter your internal standards into an Atlas
# 5) Extract data from each of your files for the internal standards
# 6) Export the results in a meaningful way
# If all goes according to plan that means on Thursday we will:
# 1) Build a small Atlas of your favorite molecules that have been identified.
# 2) Extract data from each of your files for these molecules
# 3) Export the results in a meaningful way
In [1]:
%matplotlib inline
%config InlineBackend.figure_format = 'retina'
import sys,os,glob,csv
import numpy as np
from matplotlib import pylab as plt
from scipy.spatial.distance import pdist
from scipy.cluster.hierarchy import linkage
from scipy.cluster.hierarchy import fcluster
from scipy.spatial.distance import squareform
sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )
from metatlas import metatlas_objects as metob
from metatlas import h5_query
curr_ld_lib_path = ''
os.environ['LD_LIBRARY_PATH'] = curr_ld_lib_path + ':/project/projectdirs/openmsi/jupyterhub_libs/boost_1_55_0/lib' + ':/project/projectdirs/openmsi/jupyterhub_libs/lib'
# sys.path.remove('/anaconda/lib/python2.7/site-packages')
sys.path.append('/global/project/projectdirs/openmsi/jupyterhub_libs/anaconda/lib/python2.7/site-packages')
sys.path.insert(0,'/project/projectdirs/openmsi/projects/meta-iq/pactolus/pactolus' )
from rdkit import Chem
# from rdkit.Chem.rdMolDescriptors import ExactMolWt
from rdkit.Chem import Descriptors
from rdkit.Chem import rdMolDescriptors
from rdkit.Chem import AllChem
from rdkit.Chem.Fingerprints import FingerprintMols
from rdkit.Chem import Draw
from rdkit import DataStructs
from rdkit.DataManip import Metric
import networkx as nx
In [11]:
myFiles = metob.retrieve('lcmsrun',description='%20150305_angelo_MSMS%')
print myFiles
for f in myFiles:
print f.hdf5_file
In [26]:
polarity = []
sample = []
for f in myFiles:
fp = os.path.split(f.hdf5_file.replace('.h5',''))
sfp = fp[1].split('_')
polarity.append(sfp[3])
if 'blank' in fp[1]:
sample.append('Blank')
elif 'QC' in fp[1]:
sample.append('QC')
else:
sample.append('%s_%s'%(sfp[4][:-1],sfp[5]))
ugroups = np.unique(sample)
print ugroups
In [29]:
#make the groups in the database and simultaneously populate the runs
all_my_groups = []
for j,u in enumerate(ugroups):
indices = [i for i, x in enumerate(sample) if x == u]
myGroup = metob.Group()
myGroup.name = u
myGroup.description = ugroups[j]
file_set = []
for index in indices:
file_set.append(myFiles[index])
myGroup.items = file_set
all_my_groups.append(myGroup)
# don't forget to make the "experiment" group: the group of groups
metob.store(all_my_groups)
In [28]:
len(all_my_groups)
Out[28]:
In [30]:
myGroup = metob.Group()
myGroup.name = '20151002_TLS_QExactive_Hilic_Crust Porewater'
myGroup.description = 'This crust wetup, timeseries is paired with metatranscriptomics to identify who is doing what during the course of a wetup.'
myGroup.items = all_my_groups
metob.store(myGroup)
In [31]:
%%javascript
var nb = IPython.notebook;
var kernel = IPython.notebook.kernel;
var command = "NOTEBOOK_FULL_PATH = '" + nb.base_url + nb.notebook_path + "'";
kernel.execute(command);
In [32]:
filename = os.path.basename(NOTEBOOK_FULL_PATH)
%system cp $filename /project/projectdirs/openmsi/www/
temp = '%s/%s'%('/project/projectdirs/openmsi/www',filename)
%system chmod 775 $temp
print 'http://nbviewer.ipython.org/url/portal.nersc.gov/project/openmsi/%s?flush_cache=true'%filename
In [ ]: