In [1]:

    
import pandas as pd
import glob as glob
import numpy as np
import sys
import multiprocessing as mp
import os
from matplotlib import pyplot as plt
%matplotlib notebook

def group_duplicates(df,group_col,make_string=False):
    """
    takes in a list of grouping columns and turns the rest into arrays
    """
    
    all_cols = np.asarray(df.columns)
    #get the index of the grouping term as array
    idx_group = np.argwhere(all_cols == group_col).flatten()
    #get the indices of all other terms as array
    idx_list = np.argwhere(all_cols != group_col).flatten()
    cols = all_cols[idx_list]
    
    # create a sorted numpy array (sorted by column=group_col)
    a = df.sort_values(group_col).values.T

    #get the indices of the first instance of each unique identifier
    ukeys, index = np.unique(a[idx_group,:],return_index=True)

    #split the other rows of the array into separate arrays using the 
    #unique index
    arrays = np.split(a[idx_list,:],index[1:],axis=1)

    #make a list of dicts with column headings as keys
    #if there are not multiple items then return value
    #If there are multiple items then return list
    
#     ucpds = [dict([(c,aa) if len(aa)>1 else (c,aa[0]) for c,aa in zip(cols,a)]) for a in arrays ]
    ucpds = [dict([(c,aa) for c,aa in zip(cols,a)]) for a in arrays ]

    #make a dataframe from the list of dicts
    df2 = pd.DataFrame(ucpds,index=ukeys)
    
    #make strings of array columns if you want to save it in anything useful
    if make_string==True:
        for c in cols:
#             df2[c] = df2[c].apply(lambda x: np.array2string(x, precision=5, separator=','))
            df2[c] = df2[c].apply(lambda x: str(x.tolist()))
            
    df2.index = df2.index.set_names(group_col)
    df2.reset_index(inplace=True)

    #return dataframe
    return df2



In [2]:

    
files = glob.glob('/project/projectdirs/metatlas/projects/cs_ffff/*_features.h5')
# files = glob.glob('/project/projectdirs/metatlas/projects/ffff/20171114_KBL_AS-JC_502921_MCom_1to3mem_QE139_ZHILIC5um_736960_POS*.h5')
df = pd.read_hdf(files[0],'ms1_summary')
# eic = group_duplicates(df[['label','rt','i','in_feature']],'label',make_string=False)
# eic.head()
df.head()









    Out[2]:







  
    
      
      mz_centroid
      num_datapoints
      peak_area
      peak_height
      rt_peak
    
    
      label
      
      
      
      
      
    
  
  
    
      1
      84.960321
      37.0
      4.806260e+07
      1.455724e+06
      6.571208
    
    
      6
      89.507234
      37.0
      1.654031e+08
      7.371130e+06
      10.831382
    
    
      7
      89.940499
      33.0
      2.151427e+07
      7.398588e+05
      1.253665
    
    
      13
      96.027754
      1.0
      3.604715e+03
      3.604715e+03
      0.771313
    
    
      32
      108.044790
      33.0
      7.019225e+07
      7.123824e+06
      0.385624

you only have to make EIC and msms for the max? could save num_files time



In [77]:

    
files









    Out[77]:





['/project/projectdirs/metatlas/projects/cs_ffff/20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_4_S4-MC-RCG-R4_4_Rg80to1200-CE205060--S1_Run47_features.h5',
 '/project/projectdirs/metatlas/projects/cs_ffff/20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_5_G1-MC-RCG-R1_1_Rg80to1200-CE102040--S1_Run17_features.h5',
 '/project/projectdirs/metatlas/projects/cs_ffff/20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_10_Control-MC-RCG-R2_2_Rg80to1200-CE102040--S1_Run26_features.h5',
 '/project/projectdirs/metatlas/projects/cs_ffff/20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_3_S4-MC-RCG-R3_3_Rg80to1200-CE102040--S1_Run41_features.h5',
 '/project/projectdirs/metatlas/projects/cs_ffff/20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_9_Control-MC-RCG-R1_1_Rg80to1200-CE102040--S1_Run14_features.h5',
 '/project/projectdirs/metatlas/projects/cs_ffff/20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_1_S4-MC-RCG-R1_1_Rg80to1200-CE102040--S1_Run20_features.h5',
 '/project/projectdirs/metatlas/projects/cs_ffff/20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_6_G1-MC-RCG-R2_2_Rg80to1200-CE102040--S1_Run23_features.h5',
 '/project/projectdirs/metatlas/projects/cs_ffff/20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_12_Control-MC-RCG-R4_4_Rg80to1200-CE205060--S1_Run53_features.h5',
 '/project/projectdirs/metatlas/projects/cs_ffff/20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_2_S4-MC-RCG-R2_2_Rg80to1200-CE102040--S1_Run32_features.h5',
 '/project/projectdirs/metatlas/projects/cs_ffff/20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_8_G1-MC-RCG-R4_4_Rg80to1200-CE205060--S1_Run56_features.h5',
 '/project/projectdirs/metatlas/projects/cs_ffff/20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_11_Control-MC-RCG-R3_3_Rg80to1200-CE102040--S1_Run35_features.h5',
 '/project/projectdirs/metatlas/projects/cs_ffff/20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_7_G1-MC-RCG-R3_3_Rg80to1200-CE102040--S1_Run44_features.h5']



In [111]:

    
%%time

# metatlas_dataset[file_idx][compound_idx]['ms1_summary'] = dict
# rows are same order as atlas

metatlas_dataset = []
atlas = pd.read_hdf(files[0],'atlas')
atlas.set_index('label',drop=True,inplace=True)
# atlas = atlas[[]] #we only need the index as a placeholder to organize the merge

def setup_dataframe(filename):
    df = pd.read_hdf(filename,'ms1_summary')
    
    eic = pd.read_hdf(filename,'flat_ms1')
    eic = group_duplicates(eic[['label','rt','mz','i','in_feature']],'label',make_string=False).set_index('label',drop=True)
    eic.columns = ['eic_%s'%c for c in eic.columns]
    print(eic.columns)
    df = pd.merge(df,eic,how='outer',left_index=True,right_index=True)
    
    msms = pd.read_hdf(filename,'flat_ms2')
    msms = group_duplicates(msms[['label','mz','i','rt','in_feature']],'label',make_string=False).set_index('label',drop=True)
    msms.columns = ['msms_%s'%c for c in msms.columns]
    df = pd.merge(df,msms,left_index=True,right_index=True,how='left')
#         df = pd.merge(atlas,msms,left_index=True,right_index=True,how='left')

    df['filename'] = os.path.basename(filename)
    return df

# df = setup_dataframe(files[0])
pool = mp.Pool(processes=12)
data = pool.map(setup_dataframe,files)
pool.close()
pool.join()
data = pd.concat(data)
data.index.name = 'label'

# never finishes with 40 cores
# 7min 43 s with 20 cores
# 19min 14 s with 5 cores









    



Index([u'eic_i', u'eic_in_feature', u'eic_mz', u'eic_rt'], dtype='object')
Index([u'eic_i', u'eic_in_feature', u'eic_mz', u'eic_rt'], dtype='object')
Index([u'eic_i', u'eic_in_feature', u'eic_mz', u'eic_rt'], dtype='object')
Index([u'eic_i', u'eic_in_feature', u'eic_mz', u'eic_rt'], dtype='object')
Index([u'eic_i', u'eic_in_feature', u'eic_mz', u'eic_rt'], dtype='object')
Index([u'eic_i', u'eic_in_feature', u'eic_mz', u'eic_rt'], dtype='object')
Index([u'eic_i', u'eic_in_feature', u'eic_mz', u'eic_rt'], dtype='object')
Index([u'eic_i', u'eic_in_feature', u'eic_mz', u'eic_rt'], dtype='object')
Index([u'eic_i', u'eic_in_feature', u'eic_mz', u'eic_rt'], dtype='object')
Index([u'eic_i', u'eic_in_feature', u'eic_mz', u'eic_rt'], dtype='object')
Index([u'eic_i', u'eic_in_feature', u'eic_mz', u'eic_rt'], dtype='object')
Index([u'eic_i', u'eic_in_feature', u'eic_mz', u'eic_rt'], dtype='object')
CPU times: user 575 ms, sys: 675 ms, total: 1.25 s
Wall time: 1.6 s



In [112]:

    
data.head()









    Out[112]:







  
    
      
      mz_centroid
      num_datapoints
      peak_area
      peak_height
      rt_peak
      eic_i
      eic_in_feature
      eic_mz
      eic_rt
      msms_i
      msms_in_feature
      msms_mz
      msms_rt
      filename
    
    
      label
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      84.960321
      37.0
      4.806260e+07
      1.455724e+06
      6.571208
      [1192286.875, 1371415.125, 1260008.0, 1212800....
      [True, True, True, True, True, True, True, Tru...
      [84.9603347778, 84.9603118896, 84.9603271484, ...
      [6.56609296799, 6.51328706741, 6.51810359955, ...
      [99598.4921875, 1542.93115234, 6641.25439453, ...
      [True, True, True, True, True, True, True, Tru...
      [84.9602966309, 84.9602966309, 84.9602966309, ...
      [6.62827205658, 6.62827205658, 6.62827205658, ...
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C1...
    
    
      6
      89.507234
      37.0
      1.654031e+08
      7.371130e+06
      10.831382
      [5465512.0, 6092759.5, 5921025.5, 5720820.0, 5...
      [True, True, True, True, True, True, True, Tru...
      [89.5072402954, 89.5072402954, 89.5072250366, ...
      [10.8871250153, 10.8815727234, 10.8760528564, ...
      [905781.75, 42632.7148438, 4903.70458984, 3684...
      [True, True, True, True, True, True, True, Tru...
      [89.5072021484, 89.5072021484, 89.5072021484, ...
      [10.9332647324, 10.9332647324, 10.9332647324, ...
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C1...
    
    
      7
      89.940499
      33.0
      2.151427e+07
      7.398588e+05
      1.253665
      [619433.75, 618308.9375, 659818.3125, 645762.1...
      [True, True, True, True, True, True, True, Tru...
      [89.9404830933, 89.9404754639, 89.9404907227, ...
      [1.20521366596, 1.2112865448, 1.21732211113, 1...
      [1598.48669434, 1865.84631348, 2096.5, 1449.68...
      [True, True, True, True, True, True, True, Tru...
      [89.9404983521, 89.9404983521, 89.9404983521, ...
      [1.19687259197, 1.19687259197, 1.19687259197, ...
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C1...
    
    
      13
      96.027754
      1.0
      3.604715e+03
      3.604715e+03
      0.771313
      [3604.71533203]
      [True]
      [96.0277557373]
      [0.771312832832]
      NaN
      NaN
      NaN
      NaN
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C1...
    
    
      32
      108.044790
      33.0
      7.019225e+07
      7.123824e+06
      0.385624
      [4837.21191406, 3779.58032227, 2097595.75, 205...
      [True, True, True, True, True, True, True, Tru...
      [108.044784546, 108.044815063, 108.044868469, ...
      [0.30367693305, 0.297187328339, 0.427196621895...
      [27090.4570312, 5315.66699219, 4353.57421875, ...
      [True, True, True, True, True, True, True, Tru...
      [108.044799805, 108.044799805, 108.044799805, ...
      [0.388961911201, 0.388961911201, 0.38896191120...
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C1...



In [126]:

    
temp = peak_height.T.reset_index()
def get_group_name(filename):
    if '_Control-' in filename:
        return 'Mean_Control'
    elif '_S4-' in filename:
        return 'Mean_S4'
    elif '_G1-' in filename:
        return 'Mean_G1'
    
temp['filename'] = temp['filename'].apply(get_group_name)
mean_groups = temp.groupby('filename',axis=0).mean().T
mean_groups['S4_log2'] = np.log2((mean_groups['Mean_S4']+1.0)/(1.0 + mean_groups['Mean_Control']))
mean_groups['G1_log2'] = np.log2((mean_groups['Mean_G1']+1.0)/(1.0 + mean_groups['Mean_Control']))
mean_groups.head()









    Out[126]:







  
    
      filename
      Mean_Control
      Mean_G1
      Mean_S4
      S4_log2
      G1_log2
    
    
      label
      
      
      
      
      
    
  
  
    
      1
      1.366927e+06
      1.476195e+06
      1.443770e+06
      0.078905
      0.110947
    
    
      5
      2.289487e+03
      0.000000e+00
      0.000000e+00
      -11.161439
      -11.161439
    
    
      6
      5.849534e+06
      6.193266e+06
      6.659633e+06
      0.187121
      0.082379
    
    
      7
      7.414602e+05
      7.211796e+05
      7.221202e+05
      -0.038130
      -0.040011
    
    
      12
      8.817485e+03
      9.460507e+03
      4.139517e+03
      -1.090720
      0.101539



In [127]:

    
peak_height = pd.pivot_table(data.reset_index(), values='peak_height',index=['label'], columns=['filename'],fill_value=0.)
temp = peak_height.T.reset_index()
def get_group_name(filename):
    if '_Control-' in filename:
        return 'Mean_Control'
    elif '_S4-' in filename:
        return 'Mean_S4'
    elif '_G1-' in filename:
        return 'Mean_G1'
    
temp['filename'] = temp['filename'].apply(get_group_name)
mean_groups = temp.groupby('filename',axis=0).mean().T
mean_groups['S4_log2'] = np.log2((mean_groups['Mean_S4']+1.0)/(1.0 + mean_groups['Mean_Control']))
mean_groups['G1_log2'] = np.log2((mean_groups['Mean_G1']+1.0)/(1.0 + mean_groups['Mean_Control']))
peak_height = peak_height.merge(mean_groups,how='outer',left_index=True,right_index=True)
peak_height = peak_height.merge(atlas,how='outer',left_index=True,right_index=True)
peak_height.to_csv('/global/homes/b/bpb/Downloads/cs_ffff_peak_height_7.csv')
peak_height.head()









    Out[127]:







  
    
      
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_10_Control-MC-RCG-R2_2_Rg80to1200-CE102040--S1_Run26_features.h5
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_11_Control-MC-RCG-R3_3_Rg80to1200-CE102040--S1_Run35_features.h5
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_12_Control-MC-RCG-R4_4_Rg80to1200-CE205060--S1_Run53_features.h5
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_1_S4-MC-RCG-R1_1_Rg80to1200-CE102040--S1_Run20_features.h5
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_2_S4-MC-RCG-R2_2_Rg80to1200-CE102040--S1_Run32_features.h5
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_3_S4-MC-RCG-R3_3_Rg80to1200-CE102040--S1_Run41_features.h5
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_4_S4-MC-RCG-R4_4_Rg80to1200-CE205060--S1_Run47_features.h5
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_5_G1-MC-RCG-R1_1_Rg80to1200-CE102040--S1_Run17_features.h5
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_6_G1-MC-RCG-R2_2_Rg80to1200-CE102040--S1_Run23_features.h5
      20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_7_G1-MC-RCG-R3_3_Rg80to1200-CE102040--S1_Run44_features.h5
      ...
      Mean_G1
      Mean_S4
      S4_log2
      G1_log2
      mz
      rt_peak
      inchi_key
      rt_min
      rt_max
      group_index
    
    
      label
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      1
      1311738.00
      1502384.250
      1.346962e+06
      1.400423e+06
      1.424484e+06
      1.494449e+06
      1455724.00
      1.390986e+06
      1.481474e+06
      1.568711e+06
      ...
      1.476195e+06
      1.443770e+06
      0.078905
      0.110947
      84.960
      6.604186
      NaN
      6.504186
      6.704186
      0
    
    
      5
      0.00
      0.000
      9.157949e+03
      0.000000e+00
      0.000000e+00
      0.000000e+00
      0.00
      0.000000e+00
      0.000000e+00
      0.000000e+00
      ...
      0.000000e+00
      0.000000e+00
      -11.161439
      -11.161439
      88.037
      5.812425
      NaN
      5.712425
      5.912425
      1
    
    
      6
      5769812.00
      6494037.500
      6.311342e+06
      5.994042e+06
      6.357763e+06
      6.915598e+06
      7371130.50
      5.520660e+06
      5.924286e+06
      7.023860e+06
      ...
      6.193266e+06
      6.659633e+06
      0.187121
      0.082379
      89.507
      10.930367
      NaN
      10.830367
      11.030367
      2
    
    
      7
      725063.25
      643727.875
      7.733621e+05
      7.694987e+05
      7.364027e+05
      6.427208e+05
      739858.75
      7.811588e+05
      7.521926e+05
      6.360627e+05
      ...
      7.211796e+05
      7.221202e+05
      -0.038130
      -0.040011
      89.941
      1.240956
      NaN
      1.140956
      1.340956
      3
    
    
      8
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      ...
      NaN
      NaN
      NaN
      NaN
      90.622
      5.861404
      NaN
      5.761404
      5.961404
      4
    
  

5 rows × 23 columns



In [ ]:

    
a = data.reset_index().values
labels, row_pos = np.unique(a[:, 0], return_inverse=True) #these are feature labels
basenames, col_pos = np.unique(a[:, -1], return_inverse=True) #these are rt values

pivot_table = np.zeros((len(labels), len(basenames),5), dtype=float)
pivot_table[row_pos, col_pos] = a[:, [1,2,3,4,5]]
# labels, row_pos = np.unique(a[:, 0], return_inverse=True) #these are feature labels
# rt, col_pos = np.unique(a[:, 1], return_inverse=True) #these are rt values

# pivot_table = np.zeros((len(labels), len(rt),3), dtype=float)
# pivot_table[row_pos, col_pos] = a[:, [2,3,4]]



In [ ]:

    
eic_table = np.zeros((len(labels), len(basenames),4),dtype=object)
eic_table[row_pos, col_pos] = a[:, [6,7,8,9]]



In [ ]:

    
eic_table[0,0]



In [ ]:

    
data.index.name = 'label'
peak_height = pd.pivot_table(data.reset_index(), values='i',index=['label'], columns=['filename'],fill_value=None)
peak_height.head()



In [ ]:

    
#20 seconds to do 64 of them

# metatlas_dataset[file_idx][compound_idx]['ms1_summary'] = dict
# rows are same order as atlas
metatlas_dataset = []
atlas = pd.read_hdf(files[0],'atlas')
atlas.set_index('label',drop=True,inplace=True)
atlas = atlas[[]] #we only need the index as a placeholder to organize the merge

def make_data_for_metatlas_dataset(f):
    df = pd.read_hdf(f,'ms1_summary')
    ms1 = pd.merge(atlas,df,left_index=True,right_index=True,how='left').to_dict('records')
    
    df = pd.read_hdf(f,'flat_ms1')
    eic = group_duplicates(df[['label','rt','mz','i','in_feature']],'label',make_string=False).set_index('label',drop=True)
    eic = pd.merge(atlas,eic,left_index=True,right_index=True,how='left').to_dict('records')

    df = pd.read_hdf(f,'flat_ms2')
    msms = group_duplicates(df[['label','mz','i','rt','in_feature']],'label',make_string=False).set_index('label',drop=True)
    msms = pd.merge(atlas,msms,left_index=True,right_index=True,how='left').to_dict('records')

    return [{'ms1_summary':m,'file':f,'msms':msms[j],'eic':eic[j]} for j,m in enumerate(ms1)]
    
# pool = mp.Pool(processes=10)
# data = pool.map(make_data_for_metatlas_dataset,files)
# pool.close()
# pool.join()



In [ ]:

    
df = pd.read_hdf(files[0],'flat_ms2')
msms = group_duplicates(df[['label','mz','i','rt','in_feature']],'label',make_string=False).set_index('label',drop=True)
msms.head(100)



In [ ]:

    
# %%time
df = pd.read_hdf(files[0],'flat_ms1')
eic = group_duplicates(df[['label','rt','mz','i','in_feature']],'label',make_string=False).set_index('label',drop=True)
eic.head()

	mz_centroid	num_datapoints	peak_area	peak_height	rt_peak
label
1	84.960321	37.0	4.806260e+07	1.455724e+06	6.571208
6	89.507234	37.0	1.654031e+08	7.371130e+06	10.831382
7	89.940499	33.0	2.151427e+07	7.398588e+05	1.253665
13	96.027754	1.0	3.604715e+03	3.604715e+03	0.771313
32	108.044790	33.0	7.019225e+07	7.123824e+06	0.385624

filename	Mean_Control	Mean_G1	Mean_S4	S4_log2	G1_log2
label
1	1.366927e+06	1.476195e+06	1.443770e+06	0.078905	0.110947
5	2.289487e+03	0.000000e+00	0.000000e+00	-11.161439	-11.161439
6	5.849534e+06	6.193266e+06	6.659633e+06	0.187121	0.082379
7	7.414602e+05	7.211796e+05	7.221202e+05	-0.038130	-0.040011
12	8.817485e+03	9.460507e+03	4.139517e+03	-1.090720	0.101539

	20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_10_Control-MC-RCG-R2_2_Rg80to1200-CE102040--S1_Run26_features.h5	20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_11_Control-MC-RCG-R3_3_Rg80to1200-CE102040--S1_Run35_features.h5	20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_12_Control-MC-RCG-R4_4_Rg80to1200-CE205060--S1_Run53_features.h5	20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_1_S4-MC-RCG-R1_1_Rg80to1200-CE102040--S1_Run20_features.h5	20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_2_S4-MC-RCG-R2_2_Rg80to1200-CE102040--S1_Run32_features.h5	20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_3_S4-MC-RCG-R3_3_Rg80to1200-CE102040--S1_Run41_features.h5	20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_4_S4-MC-RCG-R4_4_Rg80to1200-CE205060--S1_Run47_features.h5	20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_5_G1-MC-RCG-R1_1_Rg80to1200-CE102040--S1_Run17_features.h5	20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_6_G1-MC-RCG-R2_2_Rg80to1200-CE102040--S1_Run23_features.h5	20180427_KBL_MO-CS_FungSM_CpxMed_S4G1_QE-HF_C18_105_POS_MSMS_7_G1-MC-RCG-R3_3_Rg80to1200-CE102040--S1_Run44_features.h5	...	Mean_G1	Mean_S4	S4_log2	G1_log2	mz	rt_peak	inchi_key	rt_min	rt_max	group_index
label
1	1311738.00	1502384.250	1.346962e+06	1.400423e+06	1.424484e+06	1.494449e+06	1455724.00	1.390986e+06	1.481474e+06	1.568711e+06	...	1.476195e+06	1.443770e+06	0.078905	0.110947	84.960	6.604186	NaN	6.504186	6.704186	0
5	0.00	0.000	9.157949e+03	0.000000e+00	0.000000e+00	0.000000e+00	0.00	0.000000e+00	0.000000e+00	0.000000e+00	...	0.000000e+00	0.000000e+00	-11.161439	-11.161439	88.037	5.812425	NaN	5.712425	5.912425	1
6	5769812.00	6494037.500	6.311342e+06	5.994042e+06	6.357763e+06	6.915598e+06	7371130.50	5.520660e+06	5.924286e+06	7.023860e+06	...	6.193266e+06	6.659633e+06	0.187121	0.082379	89.507	10.930367	NaN	10.830367	11.030367	2
7	725063.25	643727.875	7.733621e+05	7.694987e+05	7.364027e+05	6.427208e+05	739858.75	7.811588e+05	7.521926e+05	6.360627e+05	...	7.211796e+05	7.221202e+05	-0.038130	-0.040011	89.941	1.240956	NaN	1.140956	1.340956	3
8	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	90.622	5.861404	NaN	5.761404	5.961404	4