MetAtlas Workflow Tool

  1. Acquire great data and upload it to this folder: /project/projectdirs/metatlas/data_for_metatlas_2/
  2. Create an experiment. This requires that you specify a directory with your mzML files in it. Convert the files and register them in the experiment.
  3. Create a file specification sheet for your experiment and fill it in. Populate the sheet and apply the file specification to the experiment.
  4. Create an Atlas in a spreadsheet. Import the spreadsheet as an Atlas in the experiment

In [1]:
%matplotlib inline
from matplotlib import pylab as plt

from metatlas import metatlas_objects
from metatlas import h5_query
# from metatlas.metatlas_objects import Atlas
# from metatlas.metatlas_objects import FileInfo

import glob, os

In [2]:
# metatlas_objects.list_experiments()
dir(metatlas_objects)


Out[2]:
['Atlas',
 'Bool',
 'CFloat',
 'CInt',
 'CUnicode',
 'Compound',
 'Experiment',
 'FileInfo',
 'FileSpec',
 'HasTraits',
 'Instance',
 'List',
 'NERSC_WORKSPACE',
 '_MetatlasObject',
 '_WORKSPACE',
 '_Workspace',
 '__builtins__',
 '__doc__',
 '__file__',
 '__name__',
 '__package__',
 'dataset',
 'get_experiment',
 'list_experiments',
 'mzml_to_hdf',
 'os',
 'pd',
 'pickle',
 'random']

In [14]:
myExperiment = metatlas_objects.Experiment(name = 'temporary_experiment_20150807')
# myExperiment.edit()


---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-14-a6a2a31932fb> in <module>()
      1 myExperiment = metatlas_objects.Experiment(name = 'temporary_experiment_20150807')
----> 2 myExperiment.edit()

/project/projectdirs/metatlas/python_pkgs/metatlas/metatlas_objects.pyc in edit(self)
    220         # create a visualization for the dataframe
    221         if not self.finfos:
--> 222             raise TypeError('Please load files first')
    223         data = [f._trait_values for f in self.finfos]
    224         dataframe = pd.DataFrame(data)

TypeError: Please load files first

In [ ]:
#Convert all the files to hdf5 and store these as an empty fileinfo for an experiment.  
#This takes a while for a big experiment
myPath = '/project/projectdirs/metatlas/data_for_metatlas_2/20150504_LPSilva_Actino_HILIC_POS_51isolates'
myFiles = glob.glob(os.path.join(myPath,'*.mzML'))
for f in myFiles:
    fs = metatlas_objects.FileSpec()
    myExperiment.load_files([f],fs)
myExperiment.save()

In [19]:
# for f in myFiles:
#     myPath = os.path.dirname(f)
#     myFile = os.path.basename(f)
#     fullPath = os.path.join(myPath,myFile)

In [23]:
print myExperiment.finfos[0].hdf_file
print myExperiment.finfos[0].mzml_file
print myExperiment.finfos[0].name
print myExperiment.finfos[0].polarity
print myExperiment.finfos[0].group
print myExperiment.finfos[0].inclusion_order
print myExperiment.finfos[0].normalization_factor
print myExperiment.finfos[0].retention_correction

# myExperiment.edit()


/project/projectdirs/metatlas/data_for_metatlas_2/20150504_LPSilva_Actino_HILIC_POS_51isolates/20150424_pHILIC_POS_MSMS_D1B20A-1.h5
/project/projectdirs/metatlas/data_for_metatlas_2/20150504_LPSilva_Actino_HILIC_POS_51isolates/20150424_pHILIC_POS_MSMS_D1B20A-1.mzML

In [3]:
import pandas as pd
atlas_file = '/global/homes/b/bpb/20150804_POS_pHILIC_R2A_Small.txt'
finfo_file = '/global/homes/b/bpb/POS_C18_Actino_FileInfo.txt'

df = pd.read_csv(atlas_file,sep='\t')
df.fillna('',inplace=True)
comp_list = df.to_dict('records')

df = pd.read_csv(finfo_file,sep='\t')
df.fillna('',inplace=True)
finfo_list = df.to_dict('records')

print comp_list[0]
# print finfo_list


{'adducts': 'H+', 'name': 'tyramine', 'rt_max': 2.63, 'mz_threshold': 15, 'pubchem_id': '', 'rt_min': 1.63, 'formula': '', 'rt_peak': 2.13, 'neutral_mass': 137.0842, 'mz': 138.0912}

In [ ]:
metatlas_objects.FileSpec(polarity = 1,
                                          group = uGroupName[j],
                                          inclusion_order = i)

In [4]:
compounds = []
for comp in comp_list:
    compounds.append(metatlas_objects.Compound(name = comp['name'], 
                               formula = comp['formula'], 
                               adducts = comp['adducts'], 
                               mz = comp['mz'], 
                               mz_threshold = comp['mz_threshold'],
                               rt_min = comp['rt_min'], 
                               rt_max = comp['rt_max'], 
                               rt_peak = comp['rt_peak'],
                               neutral_mass = comp['neutral_mass'],
                               pubchem_id = comp['pubchem_id']))

In [5]:
a = Atlas(name = 'Positive Mode R2A Media Hilic',
                           compounds=compounds)
a.edit()


:0: FutureWarning: IPython widgets are experimental and may change in the future.
//anaconda/lib/python2.7/site-packages/pandas/core/internals.py:956: FutureWarning: comparison to `None` will result in an elementwise object comparison in the future.
  return self._try_coerce_result(func(values, other))

In [ ]: