In [1]:
%matplotlib notebook
import sys
import pandas as pd
#sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )
import qgrid
from metatlas import metatlas_objects as metob
# df = pd.read_csv('/global/homes/b/bpb/notebooks/MetAtlas_Workflow_20151116/five things for metatlas2/201500826_library_qxqct_hilic_pos.txt', sep='\t') #if you are loading the corrected names
# df = pd.read_csv('/global/homes/b/bpb/notebooks/MetAtlas_Workflow_20151116/five things for metatlas2/201500826_library_qxqct_hilic_neg.txt', sep='\t') #if you are loading the corrected names
# df = pd.read_csv('/global/homes/b/bpb/notebooks/atlas_finfo_to_be_loaded/20151130_Atlas_POS_HILIC_LS_Validated (1).csv',sep = ',')
# df = pd.read_csv('/global/homes/b/bpb/notebooks/atlas_finfo_to_be_loaded/20151130_Atlas_NEG_HILIC_LS_Validated.csv',sep = ',')
# df = pd.read_csv('/global/homes/b/bpb/notebooks/atlas_finfo_to_be_loaded/20151130_Atlas_POS_HILIC_LS_ISTDs.csv',sep = ',')
# df = pd.read_csv('/global/homes/b/bpb/notebooks/atlas_finfo_to_be_loaded/20151130_Atlas_NEG_HILIC_LS_ISTDs.csv',sep = ',')
# df = pd.read_csv('/global/homes/b/bpb/notebooks/atlas_finfo_to_be_loaded/HMArkin_Pos_Atlas v3 MCNames (1).txt',sep = '\t')
df = pd.read_csv('/home/jimmy/data/atlas_finfo_to_be_loaded/20151208_Atlas_POS_HILIC_LS_Validated_RTcorr.csv',sep = ',')
# df = pd.read_csv('../atlas_finfo_to_be_loaded/20151208_Atlas_NEG_HILIC_LS_Validated_RTcorr.csv',sep = ',')
df.columns = [x.lower() for x in df.columns]
qgrid.show_grid(df, precision=5)
# TODO: Make one Atlas.
# TODO: Add column labels for each mzref, rtref, and fragmentation_ref
# TODO: Make template for this
# TODO: Make error checker to ensure Adduct is conforming to specification
# TODO: make error checker to ensure modification is conforming to specification
In [ ]:
fetch_atlases = metob.retrieve('Atlas',name='%_LS_%', username='*')
for c in fetch_atlases:
print c.name
# 2-acetamido-2-deoxy-beta-D-glucopyranosyl-(1->4)-2-amino-2-deoxy-D-glucopyranose Cite this Record
atlas_to_modify = fetch_atlases[3]
In [ ]:
# compounds that have the wrong compound name will be listed
# Keep running this until no more compounds are listed
for x in df.index:
if not metob.retrieve('Compounds',name=df.name[x]):
print df.name[x], "is not in database"
In [ ]:
df.to_csv('saveit.tab', sep='\t')
In [ ]:
keep_compound_identifications = []
for i,comp_id in enumerate(atlas_to_modify.compound_identifications):
no_hit = 1 #check for new compounds
for x in df.index:
if df.name[x] == comp_id.compound[0].name:
no_hit = 0
if no_hit == 1:
print "remove compound", comp_id.compound[0].name
if no_hit == 0: #the compound was found
keep_compound_identifications.append(comp_id)
atlas_to_modify.compound_identifications = keep_compound_identifications
# metob.store(atlas_to_modify)
In [ ]:
all_identifications = atlas_to_modify.compound_identifications
for x in df.index:
no_hit = 1 #check for new compounds
for i,comp_id in enumerate(atlas_to_modify.compound_identifications):
if df.name[x] == comp_id.compound[0].name:
no_hit = 0
old_rt_ref = comp_id.rt_references[0]
old_rt_ref.rt_min = df.rt_min[x]
old_rt_ref.rt_max = df.rt_max[x]
# print df.rt_min[x], df.rt_max[x], old_rt_ref.rt_min, old_rt_ref.rt_max
if no_hit == 1:
print df.name[x], "new compound"
new_id = metob.CompoundIdentification()
c = metob.retrieve('Compounds',name=df.name[x], username='*')[0] #currently, all copies of the molecule are returned. The 0 is the most recent one.
mzRef = metob.MzReference()
# # take the mz value from the spreadsheet
mzRef.mz = df.mz[x]
# #TODO: calculate the mz from theoretical adduct and modification if provided.
# # mzRef.mz = c.MonoIso topic_molecular_weight + 1.007276
mzRef.mz_tolerance = df.mz_threshold[x]
mzRef.mz_tolerance_units = 'ppm'
mzRef.detected_polarity = 'positive'
rtRef = metob.RtReference()
rtRef.rt_units = 'min'
rtRef.rt_min = df.rt_min[x]
rtRef.rt_max = df.rt_max[x]
rtRef.rt_peak = df.rt_peak[x]
new_id.compound = [c]
new_id.mz_references = [mzRef]
new_id.rt_references = [rtRef]
all_identifications.append(new_id)
atlas_to_modify.compound_identifications = all_identifications
metob.store(atlas_to_modify)
In [ ]:
# # myID.description = 'mz=%5.4f,ppm=%5.4f,RTmin=%5.4f,RTmax=%5.4f,RTpeak=%5.4f'%(mz[i],
# # mz_tolerance[i],
# # rt_min[i],
# # rt_max[i],
# # rt_peak[i])
# # print myID.references
# all_identifications = []
# for i,c in enumerate(myCompounds):
# mzRef = metob.MzReference()
# # take the mz value from the spreadsheet
# mzRef.mz = mz[i]
# #TODO: calculate the mz from theoretical adduct and modification if provided.
# # mzRef.mz = c.MonoIso topic_molecular_weight + 1.007276
# mzRef.mz_tolerance = mz_tolerance[i]
# mzRef.mz_tolerance_units = 'ppm'
# mzRef.detected_polarity = 'positive'
# # mzRef.adduct = '[M-H]'
# rtRef = metob.RtReference()
# rtRef.rt_units = 'min'
# rtRef.rt_min = rt_min[i]
# rtRef.rt_max = rt_max[i]
# rtRef.rt_peak = rt_peak[i]
# myID = metob.CompoundIdentification()
# myID.compound = [c]
# myID.mz_references = [mzRef]
# myID.rt_references = [rtRef]
# all_identifications.append(myID)
# myAtlas = metob.Atlas()
# #metob.Atlas() has "compound_identifications" and a "name"
# # myAtlas.name = '20151130_LS_Negative_Hilic_QExactive_Archetypes'
# myAtlas.name = '20151210_SJ_Positive_Hilic_QTOF6550_HMArkin'
# myAtlas.compound_identifications = all_identifications
# metob.store(myAtlas)
In [ ]:
myAtlas
In [ ]:
atlases = metob.retrieve('Atlas',name='20151130_LS_Positive_Hilic_QExactive_Archetypes', username='*')
a = atlases[0]
a.compound_identifications[0].mz_references
In [ ]:
print atlases[0].compound_identifications[0].compound[0].name
print atlases[0].compound_identifications[0].mz_references[0].mz
print atlases[0].compound_identifications[0].rt_references[0].rt_peak
# print atlases[0].compound_identifications[0].references[2].RTpeak
print atlases[0].compound_identifications[0].description.split(',')
print atlases[0].compound_identifications[0].name
In [ ]: