notebook.community

Edit and run



In [1]:

    
%matplotlib notebook
import sys
import pandas as pd

#sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )
import qgrid

from metatlas import metatlas_objects as metob

# df = pd.read_csv('/global/homes/b/bpb/notebooks/MetAtlas_Workflow_20151116/five things for metatlas2/201500826_library_qxqct_hilic_pos.txt', sep='\t') #if you are loading the corrected names
# df = pd.read_csv('/global/homes/b/bpb/notebooks/MetAtlas_Workflow_20151116/five things for metatlas2/201500826_library_qxqct_hilic_neg.txt', sep='\t') #if you are loading the corrected names
# df = pd.read_csv('/global/homes/b/bpb/notebooks/atlas_finfo_to_be_loaded/20151130_Atlas_POS_HILIC_LS_Validated (1).csv',sep = ',')
# df = pd.read_csv('/global/homes/b/bpb/notebooks/atlas_finfo_to_be_loaded/20151130_Atlas_NEG_HILIC_LS_Validated.csv',sep = ',')
# df = pd.read_csv('/global/homes/b/bpb/notebooks/atlas_finfo_to_be_loaded/20151130_Atlas_POS_HILIC_LS_ISTDs.csv',sep = ',')
# df = pd.read_csv('/global/homes/b/bpb/notebooks/atlas_finfo_to_be_loaded/20151130_Atlas_NEG_HILIC_LS_ISTDs.csv',sep = ',')
# df = pd.read_csv('/global/homes/b/bpb/notebooks/atlas_finfo_to_be_loaded/HMArkin_Pos_Atlas v3 MCNames (1).txt',sep = '\t')
df = pd.read_csv('/home/jimmy/data/atlas_finfo_to_be_loaded/20151208_Atlas_POS_HILIC_LS_Validated_RTcorr.csv',sep = ',')
# df = pd.read_csv('../atlas_finfo_to_be_loaded/20151208_Atlas_NEG_HILIC_LS_Validated_RTcorr.csv',sep = ',')

df.columns = [x.lower() for x in df.columns]
qgrid.show_grid(df, precision=5)

# TODO: Make one Atlas.  
# TODO: Add column labels for each mzref, rtref, and fragmentation_ref
# TODO: Make template for this
# TODO: Make error checker to ensure Adduct is conforming to specification
# TODO: make error checker to ensure modification is conforming to specification



In [ ]:

    
fetch_atlases = metob.retrieve('Atlas',name='%_LS_%', username='*')
for c in fetch_atlases:
    print c.name
# 2-acetamido-2-deoxy-beta-D-glucopyranosyl-(1->4)-2-amino-2-deoxy-D-glucopyranose Cite this Record
atlas_to_modify = fetch_atlases[3]



In [ ]:

    
# compounds that have the wrong compound name will be listed
# Keep running this until no more compounds are listed
for x in df.index:
    if not metob.retrieve('Compounds',name=df.name[x]):
        print df.name[x], "is not in database"



In [ ]:

    
df.to_csv('saveit.tab', sep='\t')



In [ ]:

    
keep_compound_identifications = []
for i,comp_id in enumerate(atlas_to_modify.compound_identifications):
    no_hit = 1 #check for new compounds
    for x in df.index:
        if df.name[x] == comp_id.compound[0].name:
            no_hit = 0
    if no_hit == 1:
        print "remove compound", comp_id.compound[0].name
    if no_hit == 0: #the compound was found
        keep_compound_identifications.append(comp_id)
atlas_to_modify.compound_identifications = keep_compound_identifications
# metob.store(atlas_to_modify)



In [ ]:

    
all_identifications = atlas_to_modify.compound_identifications
for x in df.index:
    no_hit = 1 #check for new compounds
    for i,comp_id in enumerate(atlas_to_modify.compound_identifications):
        if df.name[x] == comp_id.compound[0].name:
            no_hit = 0
            old_rt_ref = comp_id.rt_references[0]
            old_rt_ref.rt_min = df.rt_min[x]
            old_rt_ref.rt_max = df.rt_max[x]
#             print df.rt_min[x], df.rt_max[x], old_rt_ref.rt_min, old_rt_ref.rt_max
    if no_hit == 1:
        print df.name[x], "new compound"
        new_id = metob.CompoundIdentification()
        c = metob.retrieve('Compounds',name=df.name[x], username='*')[0] #currently, all copies of the molecule are returned.  The 0 is the most recent one. 
        mzRef = metob.MzReference()
#     # take the mz value from the spreadsheet
        mzRef.mz = df.mz[x]
#     #TODO: calculate the mz from theoretical adduct and modification if provided.
# #     mzRef.mz = c.MonoIso topic_molecular_weight + 1.007276
        mzRef.mz_tolerance = df.mz_threshold[x]
        mzRef.mz_tolerance_units = 'ppm'
        mzRef.detected_polarity = 'positive'

        rtRef = metob.RtReference()
        rtRef.rt_units = 'min'
        rtRef.rt_min = df.rt_min[x]
        rtRef.rt_max = df.rt_max[x]
        rtRef.rt_peak = df.rt_peak[x]


        new_id.compound = [c]
        new_id.mz_references = [mzRef]
        new_id.rt_references = [rtRef]
        all_identifications.append(new_id)

atlas_to_modify.compound_identifications = all_identifications
metob.store(atlas_to_modify)



In [ ]:

    
# #     myID.description = 'mz=%5.4f,ppm=%5.4f,RTmin=%5.4f,RTmax=%5.4f,RTpeak=%5.4f'%(mz[i],
# #                                                                                  mz_tolerance[i],
# #                                                                                  rt_min[i],
# #                                                                                  rt_max[i],
# #                                                                                  rt_peak[i])
# #     print myID.references


# all_identifications = []
# for i,c in enumerate(myCompounds):
#     mzRef = metob.MzReference()

#     # take the mz value from the spreadsheet
#     mzRef.mz = mz[i]
    
#     #TODO: calculate the mz from theoretical adduct and modification if provided.
# #     mzRef.mz = c.MonoIso topic_molecular_weight + 1.007276
#     mzRef.mz_tolerance = mz_tolerance[i]
#     mzRef.mz_tolerance_units = 'ppm'
#     mzRef.detected_polarity = 'positive'
# #     mzRef.adduct = '[M-H]'
    
#     rtRef = metob.RtReference()
#     rtRef.rt_units = 'min'
#     rtRef.rt_min = rt_min[i]
#     rtRef.rt_max = rt_max[i]
#     rtRef.rt_peak = rt_peak[i]
    
#     myID = metob.CompoundIdentification()
#     myID.compound = [c]
#     myID.mz_references = [mzRef]
#     myID.rt_references = [rtRef]

#     all_identifications.append(myID)

# myAtlas = metob.Atlas()
# #metob.Atlas() has "compound_identifications" and a "name"
# # myAtlas.name = '20151130_LS_Negative_Hilic_QExactive_Archetypes'
# myAtlas.name = '20151210_SJ_Positive_Hilic_QTOF6550_HMArkin'
# myAtlas.compound_identifications = all_identifications
# metob.store(myAtlas)



In [ ]:

    
myAtlas



In [ ]:

    
atlases = metob.retrieve('Atlas',name='20151130_LS_Positive_Hilic_QExactive_Archetypes', username='*')
a = atlases[0]
a.compound_identifications[0].mz_references



In [ ]:

    
print atlases[0].compound_identifications[0].compound[0].name
print atlases[0].compound_identifications[0].mz_references[0].mz
print atlases[0].compound_identifications[0].rt_references[0].rt_peak
# print atlases[0].compound_identifications[0].references[2].RTpeak
print atlases[0].compound_identifications[0].description.split(',')
print atlases[0].compound_identifications[0].name



In [ ]: