In [5]:
import sys
#sys.path.insert(0,'/global/project/projectdirs/metatlas/anaconda/lib/python2.7/site-packages' )
from metatlas import metatlas_objects as metob
import qgrid
%matplotlib notebook
import pandas as pd
print metob.database


<Database(mysql+pymysql://localhost/metatlas)>

In [14]:
#c = metob.retrieve('Groups', description='15_42h')
atlas = metob.retrieve('Atlas',name = '%_KZ_%',username='*')
len(atlas)
atlase_names = list()
for cc in atlas:
    atlase_names.append(cc.name)
    #print cc.compound_identifications
    
print atlase_names


[u'20151016_KZ_Qexactive_Hilic_Avena_exudates_pos', u'20150826_KZ_Qexactive_Hilic_Avena_exudates_neg', u'20151016_KZ_Negative_Hilic_QExactive_AvenaExudates', u'20151016_KZ_Positive_Hilic_QExactive_AvenaExudates', u'20151023_KZ_Positive_Hilic_QExactive_AvenaExudates', u'20151023_KZ_Positive_Hilic_QExactive_AvenaExudates', u'20151023_KZ_Positive_Hilic_QExactive_AvenaExudates', u'20151023_KZ_Positive_Hilic_QExactive_AvenaExudates', u'20151023_KZ_Positive_Hilic_QExactive_AvenaExudates', u'20151023_KZ_Positive_Hilic_QExactive_AvenaExudates', u'20151023_KZ_Positive_Hilic_QExactive_AvenaExudates_2', u'20151023_KZ_Negative_Hilic_QExactive_AvenaExudates_2']

In [3]:
#make file info template sheet by selecting a folder:
# bpb@edison06:/project/projectdirs/metatlas/raw_data/lpsilva> ls -lta
# total 3584
# drwxrws--- 17 silvest metatlas 131072 Oct 11 10:18 ..
# drwxrws---  2 silvest metatlas 131072 Sep 29 15:30 Actinobacillus test
# drwxrws---  2 silvest metatlas 262144 Sep 29 14:13 20150521_LPSilva_Actino_C18_NEG_51isolates
# drwxrws---  2 silvest metatlas 262144 Sep 29 13:43 20150514_LPSilva_Actino_C18_POS_51isolates
# drwxrws---  2 silvest metatlas 262144 Sep 29 13:09 20150512_LPSilva_Actino_HILIC_NEG_51isolates
# drwxrws---  2 silvest metatlas 262144 Sep 29 11:46 20150504_LPSilva_Actino_HILIC_POS_51isolates
# files = metob.retrieve('LcmsRun',mzml_file='%katezh/20151016_%')
files = metob.retrieve('LcmsRun',experiment='20160203_KBL-BC_Root-Exudate_Hilic_QExactive_Trial-Run', username='*')

# files = metob.retrieve('LcmsRun',mzml_file='%_LPSilva_Actino_%_51isolates%', username='*')
flist = []
for f in files:
    flist.append(f.mzml_file)
import numpy as np
flist = np.unique(flist)
print len(flist)


0

In [14]:
files[0]


Out[14]:
{'creation_time': '2015-12-14T19:43:22',
 'description': u'Human Microbiome- Arkin Lab SJ_HM_6550_151018_EL0A_Neg_098.mzML',
 'experiment': u'Human Microbiome- Arkin Lab',
 'hdf5_file': u'/global/project/projectdirs/metatlas/raw_data/sjenkins/Human Microbiome- Arkin Lab/SJ_HM_6550_151018_EL0A_Neg_098.h5',
 'head_id': u'a2099fd30e244e7980dab93317131129',
 'last_modified': '2015-12-14T19:50:15',
 'method': None,
 'mzml_file': u'/global/project/projectdirs/metatlas/raw_data/sjenkins/Human Microbiome- Arkin Lab/SJ_HM_6550_151018_EL0A_Neg_098.mzML',
 'name': u'SJ_HM_6550_151018_EL0A_Neg_098.mzML',
 'prev_uid': u'origin',
 'sample': None,
 'unique_id': u'a2099fd30e244e7980dab93317131129',
 'username': u'pasteur'}

In [4]:
#dump all the files to a spreadheet, download it, and make a "filled in" one.
with open('FileInfo_Sheet_Unique_20160203_KBL-BC_Root-Exudate_Hilic_QExactive_Trial-Run.tab','w') as fid:
    fid.write('mzml_file\tgroup\tdescription\n')
    for f in flist:
        fid.write('%s\t\t\n'%f)

In [5]:
#Download the fileinfo sheet and fill it in!
df = pd.read_csv('FileInfo_Sheet_Unique_20160203_KBL-BC_Root-Exudate_Hilic_QExactive_Trial-Run.tab',sep='\t')
df


Out[5]:
mzml_file group description
0 /global/project/projectdirs/metatlas/raw_data/... 20160203_KBL-BC_Root-Exudate_Hilic_QExactive_T... NaN
1 /global/project/projectdirs/metatlas/raw_data/... 20160203_KBL-BC_Root-Exudate_Hilic_QExactive_T... NaN

In [6]:
grouped = df.groupby(by='group')
# list(grouped)

In [16]:
grouped.groups.keys()


Out[16]:
['20160126_KBL_PS_plant_violacein_VioStdpt00001ugml',
 '20160126_KBL_PS_plant_violacein_VioStdpt00005ugml']

In [17]:
# metob.retrieve('LcmsRun',mzml_file='%0150115_pHILIC_NEG_MSMS_R2A-1An%',username='*')

In [7]:
for g in grouped.groups.keys():
    indices = grouped.groups[g]
    myGroup = metob.Group()
    myGroup.name = '%s'%g
    myGroup.description = df.loc[indices[0],'description']
    file_set = []
    for i in indices:
        file_set.append(metob.retrieve('LcmsRun',mzml_file='%%%s'%df.loc[i,'mzml_file'],username='*')[0])
    myGroup.items = file_set
    metob.store(myGroup)

In [19]:
g = metob.retrieve('Groups',name = '%_KBL_PS_%')
print len(g)
for gg in g:
    print gg.name
    for i in gg.items:
        print i.name
    print " "


20
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioStdpt1ugml
20150819_C18_ACN_POS_MSMS_Vio_pt1ugmL.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioStdpt05ugml
20150819_C18_ACN_POS_MSMS_Vio_pt05ugmL.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_WT
20141107_1_WT_C18_POS_MSMS_ACN_vioplant.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioStd5ugml
20150819_C18_ACN_POS_MSMS_Vio_5ugmL.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioStdpt005ugml
20150819_C18_ACN_POS_MSMS_Vio_pt005ugmL.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioTrp5
20141107_4_VioTrp5_C18_POS_MSMS_ACN_vioplant.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_Vio
20141107_3_vio_C18_POS_MSMS_ACN_vioplant.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioStd10ugml
20150819_C18_ACN_POS_MSMS_Vio_10ugmL.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioStdpt001ugml
20150819_C18_ACN_POS_MSMS_Vio_pt001ugmL.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioStdpt5ugml
20150819_C18_ACN_POS_MSMS_Vio_pt5ugmL.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioStd1ugml
20150819_C18_ACN_POS_MSMS_Vio_1ugmL.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioPlantEx
20150819_C18_ACN_POS_MSMS_VioPlant_1_Aug.mzML
20150819_C18_ACN_POS_MSMS_VioPlant_2_Aug.mzML
20150819_C18_ACN_POS_MSMS_VioPlant_3_Aug.mzML
20150819_C18_ACN_POS_MSMS_VioPlant_4_Aug.mzML
20150819_C18_ACN_POS_MSMS_VioPlant_5_Aug.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioStdpt01ugml
20150819_C18_ACN_POS_MSMS_Vio_pt01ugmL.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_OldVioPlantEx
20150819_C18_ACN_POS_MSMS_VioPlant_6_Aug.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_OldWTPlantEx
20150819_C18_ACN_POS_MSMS_VioPlant_7_Aug.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_Trp5
20141107_2_trp5_C18_POS_MSMS_ACN_vioplant.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioStdpt0001ugml
20150819_C18_ACN_POS_MSMS_Vio_pt0001ugmL.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioStdpt0005ugml
20150819_C18_ACN_POS_MSMS_Vio_pt0005ugmL.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioStdpt00001ugml
20150819_C18_ACN_POS_MSMS_Vio_pt00001ugmL.mzML
 
ACN_50mmAgC18_QExactive_20160126_KBL_PS_plant_violacein_VioStdpt00005ugml
20150819_C18_ACN_POS_MSMS_Vio_pt00005ugmL.mzML
 

In [ ]:


In [ ]: