To install the kernel used by NERSC-metatlas users, copy the following text to $HOME/.ipython/kernels/mass_spec_cori/kernel.json
{
"argv": [
"/global/common/software/m2650/python-cori/bin/python",
"-m",
"IPython.kernel",
"-f",
"{connection_file}"
],
"env": {
"PATH": "/global/common/software/m2650/python-cori/bin:/usr/local/bin:/usr/bin:/bin:/usr/sbin:/sbin"
},
"display_name": "mass_spec_cori",
"language": "python"
}
In [ ]:
from IPython.core.display import display, HTML
display(HTML("<style>.container { width:100% !important; }</style>"))
%matplotlib notebook
import sys
import os
import multiprocessing as mp
#### add a path to your private code if not using production code ####
#sys.path.insert(0,"/global/homes/d/dgct/Repos/metatlas/") #where your private code is
######################################################################
from metatlas.helpers import dill2plots as dp
from metatlas.helpers import fastanalysis as fa
import metatlas.metatlas_objects as metob
from metatlas.helpers import metatlas_get_data_helper_fun as ma_data
# other data tools you might need
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
In [ ]:
atlases = metob.retrieve('Atlas',name='test_atlas_positive_mode',username='*')
names = []
for i,a in enumerate(atlases):
print(i,a.name,pd.to_datetime(a.last_modified,unit='s'))#len(a.compound_identifications)
In [ ]:
atlas = atlases[0]
print atlas.name
print atlas.username
In [ ]:
groups = dp.select_groups_for_analysis(name = 'test_groups_pos',
most_recent = True,
remove_empty = True,
include_list = [], exclude_list = [])#['QC','Blank'])
In [ ]:
output_dir = '/global/homes/b/bpb/Downloads/test_pos'
if not os.path.exists(output_dir):
os.makedirs(output_dir)
print('your results will be written to %s'%output_dir)
In [ ]:
atlas_df = ma_data.make_atlas_df(atlas)
atlas_df['label'] = [cid.name for cid in atlas.compound_identifications]
all_files = []
for my_group in groups:
for my_file in my_group.items:
all_files.append((my_file , my_group, atlas_df, atlas))
pool = mp.Pool(processes=min(4, len(all_files)))
metatlas_dataset = pool.map(ma_data.get_data_for_atlas_df_and_file, all_files)
pool.close()
pool.terminate()
print 'done making metatlas_dataset'
In [ ]:
###
# Parameter meanings:
# each parameter is compared to best scoring metric for each compound
# across all files
###
# 'min_intensity' <= highest intensity across all files for given compound
# 'rt_tolerance' >= shift of median RT across all files for given compound to reference
# 'mz_tolerance' >= ppm of median mz across all files for given compound relative to reference
# 'min_msms_score' <= highest compound dot-product score across all files for given compound relative to reference
# 'min_num_frag_matches' <= number of matching mzs when calculating max_msms_score
# 'min_relative_frag_intensity' <= ratio of second highest to first highest intensity of matching sample mzs
###
# Custom
###
# kwargs = {'min_intensity': ,
# 'rt_tolerance': ,
# 'mz_tolerance': ,
# 'min_msms_score': ,
# 'allow_no_msms': ,
# 'min_num_frag_matches': ,
# 'min_relative_frag_intensity': }
###
# Loose
###
# kwargs = {'min_intensity': 1e3,
# 'rt_tolerance': .25,
# 'mz_tolerance': 25,
# 'min_msms_score': 0.3, 'allow_no_msms': True,
# 'min_num_frag_matches': 1, 'min_relative_frag_intensity': .01}
###
# Strict
###
# kwargs = {'min_intensity': 1e5,
# 'rt_tolerance': .25,
# 'mz_tolerance': 5,
# 'min_msms_score': .6, 'allow_no_msms': False,
# 'min_num_frag_matches': 3, 'min_relative_frag_intensity': .1}
In [ ]:
fa = reload(fa)
pass_atlas_df, fail_atlas_df, pass_dataset, fail_dataset = fa.filter_and_output(
atlas_df, metatlas_dataset, output_dir,
output_pass=True, output_fail=True,
**kwargs)
In [ ]:
print('%d files and %d compounds in original dataset'%(len(metatlas_dataset),len(metatlas_dataset[0])))
print('%d files and %d compounds in pass_dataset'%(len(pass_dataset),len(pass_dataset[0])))
print('%d files and %d compounds in fail_dataset'%(len(fail_dataset),len(fail_dataset[0])))
In [ ]:
dp = reload(dp)
dp.make_atlas_from_spreadsheet(my_atlas_df,
filetype='dataframe',
atlas_name='',
polarity = #my_polarity,
store=True,
mz_tolerance = 20)
In [ ]: