notebook.community

Edit and run



In [1]:

    
import time

import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np

from sklearn import preprocessing
from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_validation import StratifiedShuffleSplit
from sklearn.cross_validation import cross_val_score
#from sklearn.model_selection import StratifiedShuffleSplit
#from sklearn.model_selection import cross_val_score

from sklearn.ensemble import AdaBoostClassifier
from sklearn.metrics import roc_curve, auc
from sklearn.utils import shuffle

from scipy import interp

%matplotlib inline









    



/home/irockafe/miniconda2/envs/isaac_revo_healthcare/lib/python2.7/site-packages/sklearn/cross_validation.py:41: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)



In [296]:

    
# Get the positive mode data ~ 80 mb
local_path = '/home/irockafe/Dropbox (MIT)/Alm_Lab/projects/'
project_path = 'revo_healthcare/data/processed/ST000578/ST000578_AN000888_Results.tsv'
data = pd.read_csv(local_path+project_path, sep='\t')



In [297]:

    
# Parse the class-labels from output
outcome = data.iloc[0,:]
print outcome.unique()
# convert all P. vivax into just P.vivax
susceptible_triplicate = outcome[outcome.str.contains('Current Malaria Infection:P.Vivax') & 
        outcome.str.contains('Chloroquine Resistance:Susceptible')]

resistant_triplicate = outcome[outcome.str.contains('Current Malaria Infection:P.Vivax') & 
        outcome.str.contains('Chloroquine Resistance:Resistant')]

print '\n\nSusceptible', susceptible_triplicate.shape[0] / 3
print 'Resistant', resistant_triplicate.shape[0] / 3

# Select one of the three triplicate samples
resistant = resistant_triplicate[~resistant_triplicate.index.str.contains('\.')]
susceptible = susceptible_triplicate[~susceptible_triplicate.index.str.contains('\.')]
print 'Resistant', resistant.values
# Relabel so that only two classes
resistant[:] = 'Chloroquine resistant'
susceptible[:] = 'Chloroquine susceptible'
print '\n\n Resistant', resistant
print '\n\n susceptible', susceptible
class_labels = pd.concat([resistant, susceptible])
print class_labels









    



['Factors'
 'Current Malaria Infection:None | Prior Malaria Infection:N/A | Chloroquine Resistance:N/A'
 'Current Malaria Infection:None | Prior Malaria Infection:NO | Chloroquine Resistance:N/A'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:N/A'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:NO | Chloroquine Resistance:N/A'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Susceptible'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:None | Prior Malaria Infection:YES | Chloroquine Resistance:N/A'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:NO | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant']


Susceptible 33
Resistant 31
Resistant [ 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:NO | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant']


 Resistant 2009227    Chloroquine resistant
2008791    Chloroquine resistant
2008700    Chloroquine resistant
2008675    Chloroquine resistant
2008630    Chloroquine resistant
2007260    Chloroquine resistant
2008235    Chloroquine resistant
2009263    Chloroquine resistant
2009264    Chloroquine resistant
2009249    Chloroquine resistant
2009203    Chloroquine resistant
2008712    Chloroquine resistant
2008676    Chloroquine resistant
2008652    Chloroquine resistant
2008625    Chloroquine resistant
2009250    Chloroquine resistant
2009204    Chloroquine resistant
2008715    Chloroquine resistant
2008677    Chloroquine resistant
2008653    Chloroquine resistant
2008626    Chloroquine resistant
2008613    Chloroquine resistant
2008614    Chloroquine resistant
2009267    Chloroquine resistant
2009255    Chloroquine resistant
2009226    Chloroquine resistant
2008727    Chloroquine resistant
2008699    Chloroquine resistant
2008674    Chloroquine resistant
2008629    Chloroquine resistant
2009256    Chloroquine resistant
Name: 0, dtype: object


 susceptible 2009261    Chloroquine susceptible
2008622    Chloroquine susceptible
2009262    Chloroquine susceptible
2009228    Chloroquine susceptible
2009109    Chloroquine susceptible
2008709    Chloroquine susceptible
2008678    Chloroquine susceptible
2008631    Chloroquine susceptible
2008623    Chloroquine susceptible
2009233    Chloroquine susceptible
2009110    Chloroquine susceptible
2008710    Chloroquine susceptible
2008679    Chloroquine susceptible
2008651    Chloroquine susceptible
2008624    Chloroquine susceptible
2007278    Chloroquine susceptible
2008603    Chloroquine susceptible
2009265    Chloroquine susceptible
2009266    Chloroquine susceptible
2009253    Chloroquine susceptible
2009212    Chloroquine susceptible
2008721    Chloroquine susceptible
2008680    Chloroquine susceptible
2008654    Chloroquine susceptible
2008627    Chloroquine susceptible
2009254    Chloroquine susceptible
2009225    Chloroquine susceptible
2008722    Chloroquine susceptible
2008702    Chloroquine susceptible
2008660    Chloroquine susceptible
2008628    Chloroquine susceptible
2008618    Chloroquine susceptible
2008621    Chloroquine susceptible
Name: 0, dtype: object
2009227      Chloroquine resistant
2008791      Chloroquine resistant
2008700      Chloroquine resistant
2008675      Chloroquine resistant
2008630      Chloroquine resistant
2007260      Chloroquine resistant
2008235      Chloroquine resistant
2009263      Chloroquine resistant
2009264      Chloroquine resistant
2009249      Chloroquine resistant
2009203      Chloroquine resistant
2008712      Chloroquine resistant
2008676      Chloroquine resistant
2008652      Chloroquine resistant
2008625      Chloroquine resistant
2009250      Chloroquine resistant
2009204      Chloroquine resistant
2008715      Chloroquine resistant
2008677      Chloroquine resistant
2008653      Chloroquine resistant
2008626      Chloroquine resistant
2008613      Chloroquine resistant
2008614      Chloroquine resistant
2009267      Chloroquine resistant
2009255      Chloroquine resistant
2009226      Chloroquine resistant
2008727      Chloroquine resistant
2008699      Chloroquine resistant
2008674      Chloroquine resistant
2008629      Chloroquine resistant
                    ...           
2009228    Chloroquine susceptible
2009109    Chloroquine susceptible
2008709    Chloroquine susceptible
2008678    Chloroquine susceptible
2008631    Chloroquine susceptible
2008623    Chloroquine susceptible
2009233    Chloroquine susceptible
2009110    Chloroquine susceptible
2008710    Chloroquine susceptible
2008679    Chloroquine susceptible
2008651    Chloroquine susceptible
2008624    Chloroquine susceptible
2007278    Chloroquine susceptible
2008603    Chloroquine susceptible
2009265    Chloroquine susceptible
2009266    Chloroquine susceptible
2009253    Chloroquine susceptible
2009212    Chloroquine susceptible
2008721    Chloroquine susceptible
2008680    Chloroquine susceptible
2008654    Chloroquine susceptible
2008627    Chloroquine susceptible
2009254    Chloroquine susceptible
2009225    Chloroquine susceptible
2008722    Chloroquine susceptible
2008702    Chloroquine susceptible
2008660    Chloroquine susceptible
2008628    Chloroquine susceptible
2008618    Chloroquine susceptible
2008621    Chloroquine susceptible
Name: 0, Length: 64, dtype: object



In [298]:

    
# Check other subsets of data
susceptible_triplicate = outcome[outcome.str.contains('Current Malaria Infection:P.Vivax') & 
        outcome.str.contains('Chloroquine Resistance:Susceptible')]

resistant_triplicate = outcome[outcome.str.contains('Current Malaria Infection:P.Vivax') & 
        outcome.str.contains('Chloroquine Resistance:Resistant')]

print susceptible_triplicate.unique()
print "Number susceptible", susceptible_triplicate.shape[0] / 3, '\n'
print resistant_triplicate.unique()
print "number resistant", resistant_triplicate.shape[0] /3
print 'num resistant, no prior malaria', resistant_triplicate.str.contains('NO').shape[0] / 3
print 'num resistant, no prior malaria', resistant_triplicate.str.contains('N\/A').shape[0] / 3
print resistant_triplicate.values

# 31 resistant
# 15 with n/a
# 15 with prior malaria
# 1 without prior malaria









    



[ 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Susceptible']
Number susceptible 33 

[ 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:NO | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant']
number resistant 31
num resistant, no prior malaria 31
num resistant, no prior malaria 31
[ 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:NO | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:NO | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:NO | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:N/A | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant'
 'Current Malaria Infection:P.Vivax | Prior Malaria Infection:YES | Chloroquine Resistance:Resistant']



In [299]:

    
def remove_zero_columns(X, threshold=1e-20):
    '''
    Requires dataframe
    '''
    # convert zeros to nan, drop all nan columns, the replace leftover nan with zeros
    X_non_zero_colum = X.replace(0, np.nan).dropna(how='all', axis=1).replace(np.nan, 0)
    #.dropna(how='all', axis=0).replace(np.nan,0)
    return X_non_zero_colum

def zero_fill_half_min(X, threshold=1e-20):
    # Fill zeros with 1/2 the minimum value of that column
    # input dataframe. Add only to zero values
    
    # Get a vector of 1/2 minimum values
    half_min = X[X > threshold].min(axis=0)*0.5
    
    # Add the half_min values to a dataframe where everything that isn't zero is NaN.
    # then convert NaN's to 0
    fill_vals = (X[X < threshold] + half_min).fillna(value=0)
    
    # Add the original dataframe to the dataframe of zeros and fill-values
    X_zeros_filled = X + fill_vals
    return X_zeros_filled

toy = pd.DataFrame([[1,2,3,0],
               [0,0,0,0],
               [0.5,1,0,0]], dtype=float)

toy_no_zeros = remove_zero_columns(toy)
toy_filled_zeros = zero_fill_half_min(toy_no_zeros)
print toy
print toy_no_zeros
print toy_filled_zeros









    



     0    1    2    3
0  1.0  2.0  3.0  0.0
1  0.0  0.0  0.0  0.0
2  0.5  1.0  0.0  0.0
     0    1    2
0  1.0  2.0  3.0
1  0.0  0.0  0.0
2  0.5  1.0  0.0
      0    1    2
0  1.00  2.0  3.0
1  0.25  0.5  1.5
2  0.50  1.0  1.5



In [336]:

    
# Grab samples that have correct class labels
df_raw = data[class_labels.index]
print df_raw.head()
raise he
# remove first column and convert to float
df_raw = df_raw.iloc[1:,:].astype('float64')
print "df_raw shape", df_raw.shape
print "class labels", class_labels.shape
# Make sure labels and df_raw-columns are in correct order
print "quick eyeball that y and X are in same order ", zip(df_raw.columns, class_labels.index)[0:5]
assert (df_raw.columns == class_labels.index).all()
print df_raw.head

# Conver to binary class labels
print class_labels.unique()
le = preprocessing.LabelEncoder()
le.fit(class_labels)
y = le.transform(class_labels)
print y

# Convert to numpy array
#X_raw = df_nonzero.as_matrix().T









    



                                             2009227  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                        51065.03274   
2                                        34348.85255   
3                                        880363.6806   
4                                                  0   

                                             2008791  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                        22154.69474   
2                                        49230.82536   
3                                        1057405.156   
4                                        125925.8035   

                                             2008700  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                                  0   
2                                        70190.04916   
3                                        1362530.042   
4                                                  0   

                                             2008675  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                        26024.15828   
2                                         36202.7997   
3                                        1204027.617   
4                                        233731.1325   

                                             2008630  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                                  0   
2                                        242840.7866   
3                                        1139145.689   
4                                                  0   

                                             2007260  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                        22649.12341   
2                                        41078.10758   
3                                        847851.6719   
4                                                  0   

                                             2008235  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                                  0   
2                                        77487.93587   
3                                        1114489.331   
4                                                  0   

                                             2009263  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                        23520.59625   
2                                        92097.95032   
3                                        1168790.522   
4                                                  0   

                                             2009264  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                        325352.6865   
2                                        43108.38869   
3                                        1864196.648   
4                                        198412.5026   

                                             2009249  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                        16710.64025   
2                                        32451.82977   
3                                        923555.7857   
4                                         175605.376   

                         ...                          \
0                        ...                           
1                        ...                           
2                        ...                           
3                        ...                           
4                        ...                           

                                             2008654  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                                  0   
2                                        65779.75978   
3                                        1189716.167   
4                                                  0   

                                             2008627  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                        189801.0396   
2                                        64947.13096   
3                                         1113456.76   
4                                        162600.7731   

                                             2009254  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                        35398.58445   
2                                        45624.15079   
3                                        1981599.775   
4                                                  0   

                                             2009225  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                         81391.2985   
2                                        79544.37776   
3                                        2298585.715   
4                                                  0   

                                             2008722  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                        47856.12858   
2                                        62747.31446   
3                                        1237544.847   
4                                                  0   

                                             2008702  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                        52430.35018   
2                                        76765.77108   
3                                         1103177.29   
4                                                  0   

                                             2008660  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                         46734.1497   
2                                         40119.0363   
3                                        940232.3417   
4                                        71873.94041   

                                             2008628  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                        32292.29959   
2                                        62304.90538   
3                                         1206203.62   
4                                                  0   

                                             2008618  \
0  Current Malaria Infection:P.Vivax | Prior Mala...   
1                                        245087.8537   
2                                        70368.39229   
3                                        585418.1445   
4                                                  0   

                                             2008621  
0  Current Malaria Infection:P.Vivax | Prior Mala...  
1                                        330209.7279  
2                                        32849.71032  
3                                        1458231.452  
4                                        76562.55983  

[5 rows x 64 columns]






    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-336-9f46fa5b4def> in <module>()
      2 df_raw = data[class_labels.index]
      3 print df_raw.head()
----> 4 raise he
      5 # remove first column and convert to float
      6 df_raw = df_raw.iloc[1:,:].astype('float64')

NameError: name 'he' is not defined

Time to preprocess feature table



In [301]:

    
# TODO PQN normalization, and log-transformation, 
# and some feature selection (above certain threshold of intensity, use principal components), et

def pqn_normalize(X, integral_first=False, plot=False):
    '''
    Take a feature table and run PQN normalization on it
    '''
    # normalize by sum of intensities in each sample first. Not necessary
    if integral_first: 
        sample_sums = np.sum(X, axis=1)
        X = (X / sample_sums[:,np.newaxis])
    
    # Get the median value of each feature across all samples
    mean_intensities = np.median(X, axis=0)
    print 'mean_intensity shape', mean_intensities.shape
    print 'Mean intensity', mean_intensities[0:10]
    # Divde each feature by the median value of each feature - 
    # these are the quotients for each feature
    X_quotients = (X / mean_intensities[np.newaxis,:])
    
    if plot: # plot the distribution of quotients from one sample
        for i in range(1,len(X_quotients[:,1])):
            print 'allquotients reshaped!\n\n', 
            #all_quotients = X_quotients.reshape(np.prod(X_quotients.shape))
            all_quotients = X_quotients[i,:]
            print all_quotients.shape
            x = np.random.normal(loc=0, scale=1, size=len(all_quotients))
            sns.violinplot(all_quotients)
            plt.title("median val: %f\nMax val=%f" % (np.median(all_quotients), np.max(all_quotients)))
            plt.plot( title="median val: ")#%f" % np.median(all_quotients))
            plt.xlim([-0.5, 5])
            plt.show()

    # Define a quotient for each sample as the median of the feature-specific quotients
    # in that sample
    sample_quotients = np.median(X_quotients, axis=1)
    
    # Quotient normalize each samples
    X_pqn = X / sample_quotients[:,np.newaxis]
    return X_pqn

# Make a fake sample, with 2 samples at 1x and 2x dilutions
X_toy = np.array([[1,1,1,],
                  [2,2,2],
                  [3,6,9],
                  [6,12,18]], dtype=float)
print X_toy
print X_toy.reshape(1, np.prod(X_toy.shape))
X_toy_pqn_int = pqn_normalize(X_toy, integral_first=True)
print X_toy_pqn_int

print '\n\n\n'
X_toy_pqn = pqn_normalize(X_toy)
print X_toy_pqn









    



[[  1.   1.   1.]
 [  2.   2.   2.]
 [  3.   6.   9.]
 [  6.  12.  18.]]
[[  1.   1.   1.   2.   2.   2.   3.   6.   9.   6.  12.  18.]]
mean_intensity shape (3,)
Mean intensity [ 0.25        0.33333333  0.41666667]
[[ 0.33333333  0.33333333  0.33333333]
 [ 0.33333333  0.33333333  0.33333333]
 [ 0.16666667  0.33333333  0.5       ]
 [ 0.16666667  0.33333333  0.5       ]]




mean_intensity shape (3,)
Mean intensity [ 2.5  4.   5.5]
[[ 4.  4.  4.]
 [ 4.  4.  4.]
 [ 2.  4.  6.]
 [ 2.  4.  6.]]



In [302]:

    
def prevalence_threshold(X, threshold=0.5):
    '''
    input: numpy matrix
    output matrix, but with columns where presence/absence below threshold removed'''
    samples_present = (X > 1e-20).sum(axis=0)
    percent = np.divide(samples_present, float(X.shape[0]))
    print percent
    above_threshold = percent > threshold
    print above_threshold
    # only keep values b
    output = X[:,above_threshold]
    return output
    
test = np.array([[1,2,3], [0,0,3], [0,2,3]])
print 'Input\n', test
prevalence_threshold(test)









    



Input
[[1 2 3]
 [0 0 3]
 [0 2 3]]
[ 0.33333333  0.66666667  1.        ]
[False  True  True]






    Out[302]:





array([[2, 3],
       [0, 3],
       [2, 3]])



In [303]:

    
# preprocess feature table

# remove zero features
df_nonzero = remove_zero_columns(df_raw.T)
print 'Zero full shape', df_raw.T.shape
print 'Zeros removed shape', df_nonzero.shape
X_nonzero = df_nonzero.as_matrix()
print X_nonzero.shape


# standardize input
X_scaled = preprocessing.scale(X_nonzero)
print 'scaled mean', X_scaled.mean(axis=0)
print 'scaled std', X_scaled.std(axis=0)
print X_scaled[0:4]
# require feature to be present in at least 50% of samples
X_50percent = prevalence_threshold(X_nonzero, threshold=0.5)
print "50% prevalence cutoff", X_50percent.shape

X_80 = prevalence_threshold(X_nonzero, threshold=0.8)
print "80% prevalence cutoff", X_80.shape
# PQN normalize?
#print X_nonzero[0:5]
#X_pqn = pqn_normalize(X_nonzero)
#print X_pqn
# Final decision to use









    



Zero full shape (64, 20347)
Zeros removed shape (64, 18041)
(64, 18041)
scaled mean [ -2.77555756e-17  -2.08166817e-17  -6.93889390e-17 ...,  -5.55111512e-17
  -2.08166817e-17   2.60208521e-17]
scaled std [ 1.  1.  1. ...,  1.  1.  1.]
[[-0.22015996 -0.67058828 -1.04242515 ..., -0.81946284 -0.50490175
  -0.76044203]
 [-0.51361324 -0.27116883 -0.59282246 ..., -0.17879717 -0.3860976
   0.07657816]
 [-0.73849362  0.29135884  0.1820523  ...,  0.17456447 -0.50490175
   0.0765257 ]
 [-0.4743364  -0.62082992 -0.22046986 ...,  0.23829698 -0.50490175
  -0.57318127]]
[ 0.75      0.953125  0.984375 ...,  0.640625  0.34375   0.578125]
[ True  True  True ...,  True False  True]
50% prevalence cutoff (64, 4541)
[ 0.75      0.953125  0.984375 ...,  0.640625  0.34375   0.578125]
[False  True  True ..., False False False]
80% prevalence cutoff (64, 2103)

Define Random forest function <h/2> TODO: turn this into a pipeline, silly



In [133]:

    
def roc_curve_cv(X, y, clf, cross_val,
                path='/home/irockafe/Desktop/roc.pdf',
                save=False, plot=True):  
    t1 = time.time()
    # collect vals for the ROC curves
    tpr_list = []
    mean_fpr = np.linspace(0,1,100)
    auc_list = []
    
    # Get the false-positive and true-positive rate
    for i, (train, test) in enumerate(cross_val):
        clf.fit(X[train], y[train])
        y_pred = clf.predict_proba(X[test])[:,1]
        
        # get fpr, tpr
        fpr, tpr, thresholds = roc_curve(y[test], y_pred)
        roc_auc = auc(fpr, tpr)
        #print 'AUC', roc_auc
        #sns.plt.plot(fpr, tpr, lw=10, alpha=0.6, label='ROC - AUC = %0.2f' % roc_auc,)
        #sns.plt.show()
        tpr_list.append(interp(mean_fpr, fpr, tpr))
        tpr_list[-1][0] = 0.0
        auc_list.append(roc_auc)
        
        if (i % 10 == 0):
            print '{perc}% done! {time}s elapsed'.format(perc=100*float(i)/cross_val.n_iter, time=(time.time() - t1))
        
            
        
        
    # get mean tpr and fpr
    mean_tpr = np.mean(tpr_list, axis=0)
    # make sure it ends up at 1.0
    mean_tpr[-1] = 1.0
    mean_auc = auc(mean_fpr, mean_tpr)
    std_auc = np.std(auc_list)
    
    if plot:
        # plot mean auc
        plt.plot(mean_fpr, mean_tpr, label='Mean ROC - AUC = %0.2f $\pm$ %0.2f' % (mean_auc, 
                                                                                       std_auc),
                    lw=5, color='b')

        # plot luck-line
        plt.plot([0,1], [0,1], linestyle = '--', lw=2, color='r',
                    label='Luck', alpha=0.5) 

        # plot 1-std
        std_tpr = np.std(tpr_list, axis=0)
        tprs_upper = np.minimum(mean_tpr + std_tpr, 1)
        tprs_lower = np.maximum(mean_tpr - std_tpr, 0)
        plt.fill_between(mean_fpr, tprs_lower, tprs_upper, color='grey', alpha=0.2,
                            label=r'$\pm$ 1 stdev')

        plt.xlim([-0.05, 1.05])
        plt.ylim([-0.05, 1.05])
        plt.xlabel('False Positive Rate')
        plt.ylabel('True Positive Rate')
        plt.title('ROC curve, {iters} iterations of {cv} cross validation'.format(
            iters=cross_val.n_iter, cv='{train}:{test}'.format(test=cross_val.test_size, train=(1-cross_val.test_size)))
                 )
        plt.legend(loc="lower right")

        if save:
            plt.savefig(path,  format='pdf')


        plt.show()
    return tpr_list, auc_list, mean_fpr



In [305]:

    
rf_estimators = 500
n_iter = 25
test_size = 0.3
random_state = 1
cross_val_rf = StratifiedShuffleSplit(y, n_iter=n_iter, test_size=test_size, random_state=random_state)
clf_rf = RandomForestClassifier(n_estimators=rf_estimators, random_state=random_state)

print cross_val_rf.n_iter
print cross_val_rf.test_size

# unscaled
tpr_vals, auc_vals, mean_fpr = roc_curve_cv(X_nonzero, y, clf_rf, cross_val_rf,
                                           save=False)









    



25
0.3
0.0% done! 1.81439590454s elapsed
40.0% done! 20.7781660557s elapsed
80.0% done! 39.6885519028s elapsed



In [304]:

    
# what about with a bit of feature pruning?


rf_estimators = 2000
n_iter = 50
test_size = 0.3
random_state = 1
cross_val_rf = StratifiedShuffleSplit(y, n_iter=n_iter, test_size=test_size, random_state=random_state)
clf_rf = RandomForestClassifier(n_estimators=rf_estimators, random_state=random_state)

print cross_val_rf.n_iter
print cross_val_rf.test_size

# unscaled
tpr_vals, auc_vals, mean_fpr = roc_curve_cv(X_50percent, y, clf_rf, cross_val_rf,
                                           save=False)









    



50
0.3
0.0% done! 6.23399305344s elapsed
20.0% done! 76.0771548748s elapsed
40.0% done! 142.417345047s elapsed
60.0% done! 208.340952873s elapsed
80.0% done! 272.890228987s elapsed



In [193]:

    
# what about with a bit of feature pruning?
rf_estimators = 1000
n_iter = 25
test_size = 0.3
random_state = 1
cross_val_rf = StratifiedShuffleSplit(y, n_iter=n_iter, test_size=test_size, random_state=random_state)
clf_rf = RandomForestClassifier(n_estimators=rf_estimators, random_state=random_state)

print cross_val_rf.n_iter
print cross_val_rf.test_size

# unscaled
tpr_vals, auc_vals, mean_fpr = roc_curve_cv(X_80, y, clf_rf, cross_val_rf,
                                           save=False)









    



25
0.3
0.0% done! 3.23655700684s elapsed
40.0% done! 35.9883110523s elapsed
80.0% done! 69.0458688736s elapsed

Plot the mz/rt space



In [267]:

    
data = data.iloc[1:, :]
data = data.set_index(data.iloc[:,0])

def prevalence_threshold(df, threshold=0.5):
    '''
    features x samples df
    '''
    output = df[((df > 1e-20).sum(axis=1) / df.shape[1]) > threshold]
    return output

df_raw = data[class_labels.index]

#print df_raw.head
df_50percent_prevalence = prevalence_threshold(df_raw, threshold=0.5)
print df_50percent_prevalence.index









    



Index([u'89.5059_435.1', u'90.0074_371.2', u'90.0075_8.6', u'90.0076_439.3',
       u'90.0902_271.2', u'90.5060_435.3', u'90.5060_7.9', u'90.5060_556.1',
       u'90.5090_369.6', u'90.5092_17.6',
       ...
       u'1565.0444_430.9', u'1566.0391_432.0', u'1577.8963_418.0',
       u'1578.8915_418.4', u'1735.1634_417.2', u'1735.6551_417.7',
       u'1762.1148_417.4', u'1763.1075_417.3', u'1983.3337_417.2',
       u'1984.3186_417.5'],
      dtype='object', name=u'Samples', length=5151)



In [251]:

    
mz_rt_df = pd.DataFrame([i.split('_') for i in df_50percent_prevalence.index],
            columns=['mz', 'rt'], dtype='float64')

plt.scatter(x=mz_rt_df['rt'], y=mz_rt_df['mz'], s=2
           )
plt.xlabel('rt')
plt.ylabel('mz')
plt.show()



In [ ]:



In [334]:

    
# Select between 350 and 350
df_slice = mz_rt_df[(mz_rt_df['rt'] > 350) & (mz_rt_df['rt'] < 475)]
print mz_rt_df.shape
print asdf.shape
print df_slice.shape

# convert df_slice into X and run rf on it
print 'df-slice', df_slice[-8:-1]
print df_slice.shape
print 'shape of x-50', X_nonzero.shape









    



(5155, 2)
(2923, 2)
(1390, 2)
df-slice              mz     rt
5147  1577.8963  418.0
5148  1578.8915  418.4
5149  1735.1634  417.2
5150  1735.6551  417.7
5151  1762.1148  417.4
5152  1763.1075  417.3
5153  1983.3337  417.2
(1390, 2)
shape of x-50 (64, 18041)

Try to back out the 22 patients that are repeats...?

Or just wait for Karan Uppal to give the info



In [338]:

    
# Get the positive mode data ~ 80 mb
local_path = '/home/irockafe/Dropbox (MIT)/Alm_Lab/projects/'
project_path = 'revo_healthcare/data/processed/ST000578/ST000578_AN000888_Results.tsv'
data = pd.read_csv(local_path+project_path, sep='\t')



In [344]:

    
df_50percent_prevalence
mz_rt_df = pd.DataFrame([i.split('_') for i in df_50percent_prevalence.index],
            columns=['mz', 'rt'], dtype='float64',
                       index=df_50percent_prevalence.index)
mz_rt_df
df_50percent_prevalence_mzrt = pd.concat([mz_rt_df, df_50percent_prevalence],
                                        axis=1)
df_50percent_prevalence_mzrt









    Out[344]:







  
    
      
      mz
      rt
      2009227
      2008791
      2008700
      2008675
      2008630
      2007260
      2008235
      2009263
      ...
      2008654
      2008627
      2009254
      2009225
      2008722
      2008702
      2008660
      2008628
      2008618
      2008621
    
    
      Samples
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      89.5059_435.1
      89.5059
      435.1
      0
      0
      8257841.742
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      90.0074_371.2
      90.0074
      371.2
      10498841.66
      9841835.671
      9580520.612
      10914163.22
      9153719.487
      10828925.47
      11772214.82
      10512799.3
      ...
      7026674.414
      7032003.049
      7719911.028
      7256920.416
      8141573.982
      7984498.671
      9110181.769
      10796345.17
      8011757.469
      6165551.241
    
    
      90.0075_8.6
      90.0075
      8.6
      3965161.294
      2781320.516
      1717467.517
      4557703.401
      3319942.933
      2346799.773
      16144.20002
      3191167.455
      ...
      3361374.609
      3225108.739
      3234679.55
      2257954.805
      2829996.817
      3025325.917
      3964125.1
      3894615.297
      3767459.37
      1860727.982
    
    
      90.0076_439.3
      90.0076
      439.3
      0
      0
      1074652.702
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      90.0902_271.2
      90.0902
      271.2
      149087.1152
      0
      0
      0
      0
      74752.09517
      0
      0
      ...
      0
      134253.2046
      0
      0
      0
      0
      226999.9155
      0
      224761.5756
      0
    
    
      90.5060_435.3
      90.5060
      435.3
      0
      0
      884390.5513
      0
      0
      1974046.316
      1700740.002
      0
      ...
      0
      0
      2882807.655
      780882.399
      0
      0
      0
      1167876.502
      0
      0
    
    
      90.5060_7.9
      90.5060
      7.9
      6058519.493
      2231072.165
      423239.0544
      3850227.892
      5264008.161
      4112418.498
      2444736.853
      5342918.932
      ...
      6515632.634
      0
      4333684.698
      3044488.261
      0
      3399020.112
      4821618.007
      6331004.895
      6149395.473
      4007774.961
    
    
      90.5060_556.1
      90.5060
      556.1
      0
      0
      4244221.354
      4864197.202
      0
      4951916.472
      4896935.91
      1427984.56
      ...
      0
      2333475.243
      1396999.005
      0
      4685815.578
      4315130.185
      1679274.02
      3989390.666
      0
      0
    
    
      90.5090_369.6
      90.5090
      369.6
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      125527.8799
      238728.988
      0
      0
      0
    
    
      90.5092_17.6
      90.5092
      17.6
      0
      115455.5355
      0
      0
      28068.56722
      0
      0
      0
      ...
      0
      0
      14851.66607
      0
      0
      120916.2108
      128653.7856
      78832.70997
      0
      0
    
    
      90.5247_196.0
      90.5247
      196.0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      90.5248_375.3
      90.5248
      375.3
      3021854.317
      0
      0
      2220677.433
      0
      0
      0
      0
      ...
      406403.6735
      0
      0
      0
      0
      3537445.909
      0
      7253540.674
      0
      3327266.86
    
    
      90.5249_36.0
      90.5249
      36.0
      5448438.699
      7198422.653
      9807786.115
      6275756.742
      4291497.217
      6524701.024
      0
      6394814.163
      ...
      8017484.285
      8698940.475
      6447841.305
      10352306.69
      9928524.935
      7441818.805
      4932827.72
      8323500.152
      7137622.677
      0
    
    
      90.9756_43.2
      90.9756
      43.2
      5969641.319
      7682736.664
      6454374.441
      5167697.656
      7710570.018
      5836218.512
      5608189.373
      4921445.729
      ...
      6676109.719
      5564232.26
      6342556.3
      5943083.312
      6767760.897
      5100041.402
      6431736.961
      5525051.032
      5507681.851
      5640639.179
    
    
      91.0077_10.9
      91.0077
      10.9
      350121.5799
      280759.0341
      265857.4427
      350320.5165
      187919.9025
      212206.6134
      246438.1215
      262296.1873
      ...
      242635.6677
      221675.7999
      229490.9118
      248242.2838
      224932.697
      283475.8571
      335048.9052
      255997.4283
      407961.83
      357468.8174
    
    
      91.0077_573.9
      91.0077
      573.9
      0
      0
      0
      0
      0
      71452.65731
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      91.0225_34.9
      91.0225
      34.9
      414013.9793
      0
      951154.1525
      621837.0755
      440505.0904
      481915.8709
      1085349.859
      472663.6129
      ...
      701208.7806
      695549.5147
      593596.924
      1011122.885
      785552.2501
      674660.9142
      234798.2155
      722024.3645
      659349.3175
      368513.9601
    
    
      91.0225_308.7
      91.0225
      308.7
      744811.4944
      783890.241
      0
      0
      0
      0
      1044914.369
      0
      ...
      0
      0
      784667.4444
      0
      0
      0
      0
      0
      0
      0
    
    
      91.0266_36.8
      91.0266
      36.8
      323496.014
      419842.6729
      600436.0097
      393878.3168
      262175.7103
      421403.3179
      550224.7103
      397568.9655
      ...
      575749.0563
      551845.2813
      436312.5506
      575679.7514
      489581.8057
      474608.3407
      387506.319
      545998.9721
      489848.0843
      198919.9106
    
    
      91.0534_374.6
      91.0534
      374.6
      0
      0
      0
      0
      0
      37166.76623
      297235.8426
      0
      ...
      51769.32302
      55167.76522
      0
      0
      55177.71829
      0
      0
      0
      0
      0
    
    
      91.5011_18.2
      91.5011
      18.2
      0
      0
      0
      0
      8205.302463
      59620.41605
      63188.63976
      0
      ...
      0
      0
      6565.430852
      0
      0
      0
      40812.97441
      38493.93123
      0
      0
    
    
      91.5036_434.7
      91.5036
      434.7
      0
      0
      0
      0
      0
      0
      650499.8168
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      91.5037_8.1
      91.5037
      8.1
      2113778.491
      1035747.08
      596398.0602
      1648143.578
      1999672.742
      1023945.405
      0
      1702671.468
      ...
      1835541.686
      1965144.129
      1578519.992
      1254855.662
      1291675.706
      1282284.369
      1630517.095
      2623260.837
      1738926.971
      985253.1105
    
    
      91.5037_555.0
      91.5037
      555.0
      0
      796994.0787
      1678355.833
      1003823.215
      1746155.359
      889175.3619
      1773346.163
      766091.1939
      ...
      803211.2381
      989886.4252
      496448.3167
      779719.8628
      1759576.634
      1075611.757
      563945.8533
      1605973.717
      509069.1679
      0
    
    
      91.9790_41.8
      91.9790
      41.8
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      92.0049_573.6
      92.0049
      573.6
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      73328.39513
      0
      143190.6162
      56492.28231
      0
      0
      0
      0
      57865.94175
    
    
      92.0389_37.6
      92.0389
      37.6
      0
      0
      0
      0
      0
      55056.23856
      31438.43584
      34584.76624
      ...
      90017.77892
      27108.19628
      22884.95239
      55619.15778
      185537.0365
      35938.26805
      0
      34736.3584
      90954.89343
      14103.02042
    
    
      92.5025_10.1
      92.5025
      10.1
      196277.8927
      87930.64936
      204554.4669
      140074.234
      75841.48121
      116998.9605
      203119.1601
      203405.359
      ...
      125295.0378
      98060.14562
      129047.4041
      188838.3328
      72702.59088
      185231.1968
      117949.371
      148770.824
      191671.5297
      202568.1988
    
    
      92.5025_562.9
      92.5025
      562.9
      0
      145322.964
      0
      0
      0
      71125.79177
      136892.2807
      152640.6671
      ...
      0
      109437.3279
      0
      138676.3812
      0
      0
      0
      0
      0
      122478.3809
    
    
      92.5025_448.2
      92.5025
      448.2
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      1333.9466_432.0
      1333.9466
      432.0
      363706
      254383
      629712
      380184
      785763
      0
      125991
      235487
      ...
      0
      0
      136440
      185800
      244905
      0
      259650
      0
      441671
      0
    
    
      1346.6934_39.8
      1346.6934
      39.8
      234329
      263865
      894916
      382636
      257716
      0
      585774
      0
      ...
      285468
      182446
      0
      200518
      0
      12980.6
      459267
      0
      0
      0
    
    
      1362.6598_41.6
      1362.6598
      41.6
      369198
      500438
      0
      0
      0
      515858
      89219.1
      104976
      ...
      208159
      213668
      0
      0
      22031.9
      418287
      565639
      250577
      0
      838983
    
    
      1487.5033_417.3
      1487.5033
      417.3
      277395
      1.21835e+06
      1.56868e+06
      1.4268e+06
      1.50004e+06
      433265
      187134
      1.27366e+06
      ...
      984057
      2.2708e+06
      2.49157e+06
      2.90481e+06
      374955
      2.33353e+06
      679352
      1.08524e+06
      5.33895e+06
      1.22672e+06
    
    
      1488.0020_417.5
      1488.0020
      417.5
      1.11106e+06
      7.14363e+06
      7.32453e+06
      7.466e+06
      1.05085e+07
      2.74557e+06
      717243
      7.20509e+06
      ...
      4.55781e+06
      1.16489e+07
      1.39584e+07
      1.35243e+07
      3.2937e+06
      9.34309e+06
      4.25661e+06
      4.85552e+06
      1.65558e+07
      4.43804e+06
    
    
      1488.5009_417.4
      1488.5009
      417.4
      0
      707461
      775796
      674553
      617459
      80112.2
      0
      710955
      ...
      895621
      1.27398e+06
      3.90967e+06
      945761
      329427
      957294
      507604
      235312
      3.30025e+06
      242450
    
    
      1490.0102_417.4
      1490.0102
      417.4
      237892
      942951
      861405
      1.09992e+06
      904029
      304760
      354768
      740417
      ...
      443855
      1.2584e+06
      1.7558e+06
      2.18381e+06
      0
      1.58963e+06
      878667
      0
      0
      826558
    
    
      1490.9995_417.5
      1490.9995
      417.5
      0
      502305
      1.94181e+06
      0
      1.73085e+06
      0
      0
      947171
      ...
      391002
      1.11622e+06
      252711
      1.2137e+06
      0
      820919
      909906
      626392
      3.70908e+06
      123263
    
    
      1497.9852_418.5
      1497.9852
      418.5
      18198.8
      725228
      399984
      908093
      711426
      706436
      0
      730910
      ...
      870677
      757316
      715944
      2.49575e+06
      551390
      895318
      400494
      468692
      517338
      34710.9
    
    
      1498.4782_418.0
      1498.4782
      418.0
      1.473e+06
      47696
      562845
      1.56137e+06
      578754
      0
      0
      86531.5
      ...
      316964
      416897
      2.95054e+06
      1.32693e+06
      251191
      504571
      444715
      0
      1.22777e+06
      331470
    
    
      1498.9934_417.3
      1498.9934
      417.3
      586778
      794281
      318663
      320658
      292070
      283998
      0
      662019
      ...
      606554
      216305
      525353
      633034
      0
      219029
      758872
      220786
      897147
      1.05328e+06
    
    
      1499.4869_417.4
      1499.4869
      417.4
      13173.5
      406366
      49873.9
      0
      722235
      202596
      0
      855035
      ...
      633549
      396916
      336799
      837536
      0
      704534
      349528
      0
      0
      0
    
    
      1506.4662_417.1
      1506.4662
      417.1
      0
      474581
      660630
      1.14833e+06
      267524
      0
      0
      139587
      ...
      579881
      756347
      836195
      1.10743e+06
      0
      198979
      591199
      0
      353258
      935934
    
    
      1506.9634_417.3
      1506.9634
      417.3
      0
      0
      2.14088e+06
      432544
      1.7441e+06
      0
      0
      511794
      ...
      1.18723e+06
      1.94759e+06
      588442
      668157
      0
      0
      0
      0
      0
      0
    
    
      1508.9547_418.5
      1508.9547
      418.5
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      1.36492e+06
      1.70563e+06
      0
      0
      668008
      1.48265e+06
      868447
      839569
      2.10961e+06
      1.22649e+06
    
    
      1509.4864_417.5
      1509.4864
      417.5
      272379
      915517
      371483
      591921
      1.47227e+06
      1.35451e+06
      0
      1.10731e+06
      ...
      0
      1.31202e+06
      723156
      635745
      432109
      0
      1.52411e+06
      633306
      0
      0
    
    
      1509.9852_418.0
      1509.9852
      418.0
      619030
      1.08713e+06
      1.08287e+06
      1.17719e+06
      1.06641e+06
      977258
      152933
      1.5519e+06
      ...
      1.21676e+06
      1.88511e+06
      1.60785e+06
      1.45525e+06
      1.24603e+06
      1.45683e+06
      1.12249e+06
      1.00433e+06
      0
      1.50663e+06
    
    
      1510.9611_417.9
      1510.9611
      417.9
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      499558
      662341
      625116
      0
      467204
      487650
      308450
      264955
      821691
      874620
    
    
      1514.4625_417.4
      1514.4625
      417.4
      368950
      963761
      792611
      1.30887e+06
      896520
      560616
      218595
      926808
      ...
      1.14567e+06
      1.49725e+06
      1.82213e+06
      1.66125e+06
      517588
      1.03396e+06
      533479
      868694
      0
      1.40544e+06
    
    
      1514.9637_417.6
      1514.9637
      417.6
      356141
      2.01115e+06
      762706
      4.31643e+06
      1.06663e+06
      567262
      159991
      1.14849e+06
      ...
      0
      1.19869e+06
      1.61884e+06
      4.82512e+06
      0
      1.23353e+06
      559614
      1.57724e+06
      0
      0
    
    
      1565.0444_430.9
      1565.0444
      430.9
      1478.76
      298353
      25286.9
      147028
      320048
      0
      0
      43190.4
      ...
      0
      213184
      181380
      1.12326e+06
      110779
      0
      0
      0
      902647
      0
    
    
      1566.0391_432.0
      1566.0391
      432.0
      85943.4
      587645
      0
      331745
      263775
      0
      0
      189343
      ...
      0
      81710.9
      408747
      332965
      131796
      155424
      145495
      131871
      633759
      0
    
    
      1577.8963_418.0
      1577.8963
      418.0
      342378
      204815
      776990
      252045
      257158
      499335
      383471
      186745
      ...
      263563
      0
      429490
      477172
      197101
      744663
      352338
      309261
      0
      561984
    
    
      1578.8915_418.4
      1578.8915
      418.4
      469602
      458640
      0
      144189
      674929
      295683
      236287
      341081
      ...
      940247
      317598
      353704
      353458
      389799
      0
      564795
      0
      0
      0
    
    
      1735.1634_417.2
      1735.1634
      417.2
      358808
      843433
      925776
      1.00208e+06
      1.14441e+06
      242722
      0
      909280
      ...
      469164
      1.90716e+06
      1.40435e+06
      1.71562e+06
      253114
      1.47031e+06
      910961
      421894
      2.81354e+06
      1.40064e+06
    
    
      1735.6551_417.7
      1735.6551
      417.7
      0
      1.25331e+06
      0
      1.6635e+06
      1.22537e+06
      0
      0
      1.0357e+06
      ...
      607725
      2.31893e+06
      2.17909e+06
      1.83453e+06
      509136
      1.0987e+06
      1.31286e+06
      1.01901e+06
      2.93617e+06
      856519
    
    
      1762.1148_417.4
      1762.1148
      417.4
      0
      701023
      388293
      475046
      394144
      746369
      0
      666752
      ...
      0
      1.7547e+06
      632190
      515846
      0
      936565
      1.0517e+06
      0
      633518
      0
    
    
      1763.1075_417.3
      1763.1075
      417.3
      0
      0
      510225
      0
      0
      0
      0
      176634
      ...
      520883
      118599
      367589
      1.70472e+06
      0
      305821
      43446.4
      348186
      488637
      0
    
    
      1983.3337_417.2
      1983.3337
      417.2
      1036.54
      460889
      714522
      760267
      874229
      228267
      0
      475948
      ...
      294456
      845642
      1.41258e+06
      0
      0
      0
      1.07847e+06
      613841
      2.0938e+06
      0
    
    
      1984.3186_417.5
      1984.3186
      417.5
      0
      322134
      322114
      72068.8
      697438
      0
      0
      0
      ...
      45538.6
      565770
      686524
      691060
      0
      374726
      17701.9
      249300
      984439
      0
    
  

5151 rows × 66 columns



In [353]:



In [354]:

    
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.ticker import NullFormatter

def get_rt_slice(df, rt_bounds):
    '''
    PURPOSE: 
        Given a tidy feature table with 'mz' and 'rt' column headers, 
        retain only the features whose rt is between rt_left 
        and rt_right
    INPUT: 
        df - a tidy pandas dataframe with 'mz' and 'rt' column 
            headers
        rt_left, rt_right: the boundaries of your rt_slice, in seconds
    '''
    out_df = df.loc[ (df['rt'] > rt_bounds[0]) & 
                    (df['rt'] < rt_bounds[1])]
    return out_df

def plot_mz_rt(df, rt_bounds, path='/home/irockafe/Desktop/poop.pdf'):
    # the random data
    x = df['rt']
    y = df['mz']
    print np.max(x)
    print np.max(y)
    nullfmt = NullFormatter()         # no labels

    # definitions for the axes
    left, width = 0.1, 0.65
    bottom, height = 0.1, 0.65
    bottom_h = left_h = left + width + 0.02

    rect_scatter = [left, bottom, width, height]
    rect_histx = [left, bottom_h, width, 0.2]
    rect_histy = [left_h, bottom, 0.2, height]

    # start with a rectangular Figure
    #fig = plt.figure(1, figsize=(8, 8))
    fig = plt.figure(1, figsize=(10,10))
    
    axScatter = plt.axes(rect_scatter)
    axHistx = plt.axes(rect_histx)
    axHisty = plt.axes(rect_histy)

    # no labels
    axHistx.xaxis.set_major_formatter(nullfmt)
    axHisty.yaxis.set_major_formatter(nullfmt)

    # the scatter plot:
    axScatter.scatter(x, y, s=1)

    # now determine nice limits by hand:
    binwidth = 0.25

    #xymax = np.max([np.max(np.fabs(x)), np.max(np.fabs(y))])

    #lim = (int(xymax/binwidth) + 1) * binwidth

    x_min = np.min(x)-50
    x_max = np.max(x)+50
    axScatter.set_xlim(x_min, x_max )
    y_min = np.min(y)-50
    y_max = np.max(y)+50
    axScatter.set_ylim(y_min, y_max)

    # Add vertical red line between 750-1050 retention time
    '''
    plt.plot([0,1], [0,1], linestyle = '--', lw=2, color='r',
                        label='Luck', alpha=0.5)
    '''
    print 'ymin: ', y_min
    # Add vertical/horizontal lines to scatter and histograms
    axScatter.axvline(x=rt_bounds[0], lw=2, color='r', alpha=0.5)
    axScatter.axvline(x=rt_bounds[1], lw=2, color='r', alpha=0.5)

    axHistx.axvline(x=rt_bounds[0], lw=2, color='r', alpha=0.5)
    axHistx.axvline(x=rt_bounds[1], lw=2, color='r', alpha=0.5)

    #bins = np.arange(-lim, lim + binwidth, binwidth)
    bins = 100
    axHistx.hist(x, bins=bins)
    axHisty.hist(y, bins=bins, orientation='horizontal')

    axHistx.set_xlim(axScatter.get_xlim())
    axHisty.set_ylim(axScatter.get_ylim())

    axScatter.set_ylabel('m/z', fontsize=30)
    axScatter.set_xlabel('Retention Time', fontsize=30)

    axHistx.set_ylabel('# of Features', fontsize=20)
    axHisty.set_xlabel('# of Features', fontsize=20)
        
    plt.savefig(path, 
                format='pdf')
    plt.show()


rt_slice = (350, 475)
df_50percent_slice = get_rt_slice(df_50percent_prevalence_mzrt, rt_slice)

# plot selection 
plot_mz_rt(df_50percent_prevalence_mzrt, (rt_slice))









    



596.6
1984.3186
ymin:  39.5059



In [394]:

    
X_50_slice = df_50percent_slice.T.as_matrix()
print "slice shape", X_50_slice.shape



rf_estimators = 1000
n_iter = 25
test_size = 0.3
random_state = 1
cross_val_rf = StratifiedShuffleSplit(y, n_iter=n_iter, test_size=test_size, random_state=random_state)
clf_rf = RandomForestClassifier(n_estimators=rf_estimators, random_state=random_state)

print cross_val_rf.n_iter
print cross_val_rf.test_size

# unscaled
tpr_vals, auc_vals, mean_fpr = roc_curve_cv(X_50_slice, y, clf_rf, cross_val_rf,
                                           save=False)









    



slice shape (66, 2229)
25
0.3
0.0% done! 4.05469584465s elapsed
40.0% done! 37.6644158363s elapsed
80.0% done! 70.914978981s elapsed

Same story holds as before: By running only a subset of chromatography, you can still do just as well classifying

Try another subset of retention time



In [401]:

    
def slice_and_predict(df, y, rt_slice,
                     rf_estimators=1000, 
                     n_iter=10, test_size=0.3, random_state=1):
    df_slice = get_rt_slice(df, rt_slice)

    # plot selection 
    plot_mz_rt(df, (rt_slice))
    # remove mz and rt from dataframe...
    df_slice_no_mzrt = df_slice.drop(['mz', 'rt'], axis=1)
    X_slice = df_slice_no_mzrt.T.as_matrix()
    print "slice shape", X_slice.shape

    # Run RF
    rf_estimators = rf_estimators
    n_iter = n_iter
    test_size = test_size
    random_state = random_state
    cross_val_rf = StratifiedShuffleSplit(y, n_iter=n_iter, test_size=test_size, random_state=random_state)
    clf_rf = RandomForestClassifier(n_estimators=rf_estimators, random_state=random_state)
    tpr_vals, auc_vals, mean_fpr = roc_curve_cv(X_slice, y, clf_rf, cross_val_rf,
                                               save=False)


rt_slice = (0,100)
df_50percent_prevalence_mzrt
#df_50percent_prevalence_mzrt.drop(['mz', 'rt'], axis=1)
slice_and_predict(df_50percent_prevalence_mzrt, y, rt_slice)









    



596.6
1984.3186
ymin:  39.5059






    












    



<bound method DataFrame.head of                     2009227      2008791      2008700      2008675  \
Samples                                                              
90.0075_8.6     3965161.294  2781320.516  1717467.517  4557703.401   
90.5060_7.9     6058519.493  2231072.165  423239.0544  3850227.892   
90.5092_17.6              0  115455.5355            0            0   
90.5249_36.0    5448438.699  7198422.653  9807786.115  6275756.742   
90.9756_43.2    5969641.319  7682736.664  6454374.441  5167697.656   
91.0077_10.9    350121.5799  280759.0341  265857.4427  350320.5165   
91.0225_34.9    414013.9793            0  951154.1525  621837.0755   
91.0266_36.8     323496.014  419842.6729  600436.0097  393878.3168   
91.5011_18.2              0            0            0            0   
91.5037_8.1     2113778.491   1035747.08  596398.0602  1648143.578   
91.9790_41.8              0            0            0            0   
92.0389_37.6              0            0            0            0   
92.5025_10.1    196277.8927  87930.64936  204554.4669   140074.234   
92.5213_33.7    773106.0023  189058.7431  233366.6464  127477.4813   
93.5024_15.7    8363.454429  124910.9258  79832.47732  5702.521546   
93.5246_36.8    27273.71485  27701.95149   31562.4255  53418.35481   
94.0441_38.1    237182.9477  351465.6874  679253.2611  390439.3065   
94.5141_36.3    21045.32852  20081.18228            0            0   
94.5447_38.7              0  132007.0237  361311.9523            0   
95.0152_57.8              0            0            0            0   
95.0230_16.6              0  253450.6983  219615.8261  186455.0579   
95.0427_38.9    45381.73524            0            0  58319.49927   
96.0797_94.5              0  50255.51454            0  124948.3142   
96.5139_14.9    45801.87214  91960.15387            0            0   
96.9207_46.7    2229178.891  2560462.625  3090669.778  2593588.989   
96.9999_37.5    32568.47941  42686.30616  100703.8001  64140.23025   
97.5134_8.3      616621.111  580573.7253  263931.6335  426650.2366   
97.9314_18.4    61451.37543  77527.53581            0  83520.84122   
97.9678_10.1    177791.9569            0  98961.88121   220931.936   
97.9904_35.9              0  5478627.934            0  17963823.89   
...                     ...          ...          ...          ...   
1150.7485_41.3       142016       317447       836359       303915   
1152.6906_41.8  1.34367e+06       185594       318880       151371   
1159.7096_41.8       716867      21283.9       100743            0   
1160.7025_42.6       325445            0       320834            0   
1164.6497_43.5       294450       318484            0       656525   
1166.7250_40.5            0            0            0            0   
1172.7414_44.5            0            0            0            0   
1176.6816_42.0       231377       105894            0       460137   
1182.7060_40.1            0            0            0       496689   
1184.7142_42.1       249643       314052       132478       204471   
1188.7304_40.8       411639       566085       763949       323268   
1200.6901_42.3       314384       667300       419340       224707   
1204.7010_40.7       276926       425151       177740       404186   
1210.7114_41.6       289660            0       316982       816834   
1216.6546_42.7            0            0            0       402922   
1226.6901_42.2  1.36364e+06       476723       641828  1.17269e+06   
1234.7163_39.7            0       245003            0            0   
1242.6644_42.5       891287            0            0      3440.72   
1250.6978_41.0            0       648949       139276       264940   
1252.6974_42.1       328404            0       358652       267347   
1256.7109_40.4       364786       504363       306756       654547   
1262.7252_41.3       330996       472009            0       272502   
1272.6851_41.2       646273       366600       402188       273924   
1274.4884_13.6            0            0            0       139544   
1278.7022_41.9       450561       563901       962647       804148   
1294.6764_42.5       627729       296142       898464            0   
1324.6985_41.2       368360            0       306798       589322   
1330.7125_41.2       790961       324127            0       106687   
1346.6934_39.8       234329       263865       894916       382636   
1362.6598_41.6       369198       500438            0            0   

                    2008630      2007260      2008235      2009263  \
Samples                                                              
90.0075_8.6     3319942.933  2346799.773  16144.20002  3191167.455   
90.5060_7.9     5264008.161  4112418.498  2444736.853  5342918.932   
90.5092_17.6    28068.56722            0            0            0   
90.5249_36.0    4291497.217  6524701.024            0  6394814.163   
90.9756_43.2    7710570.018  5836218.512  5608189.373  4921445.729   
91.0077_10.9    187919.9025  212206.6134  246438.1215  262296.1873   
91.0225_34.9    440505.0904  481915.8709  1085349.859  472663.6129   
91.0266_36.8    262175.7103  421403.3179  550224.7103  397568.9655   
91.5011_18.2    8205.302463  59620.41605  63188.63976            0   
91.5037_8.1     1999672.742  1023945.405            0  1702671.468   
91.9790_41.8              0            0            0            0   
92.0389_37.6              0  55056.23856  31438.43584  34584.76624   
92.5025_10.1    75841.48121  116998.9605  203119.1601   203405.359   
92.5213_33.7    87162.15831  138814.7661   187789.747  139786.8405   
93.5024_15.7              0  76968.31876  78529.15582  112017.0127   
93.5246_36.8    84371.93364  43383.64467  72514.67406  37776.90846   
94.0441_38.1    379367.2668  468965.7531  413461.1879  427036.1823   
94.5141_36.3              0            0            0  19881.56627   
94.5447_38.7    29494.28423            0            0  18741.45921   
95.0152_57.8              0            0            0            0   
95.0230_16.6    230590.6664  194719.6009  231039.6628  202508.7813   
95.0427_38.9              0            0            0            0   
96.0797_94.5              0            0            0            0   
96.5139_14.9    75305.55127  122974.0817  14113.79822  7531.516358   
96.9207_46.7    2307465.897   2391115.04  2282793.731  2178811.992   
96.9999_37.5    43443.45564   89798.2985  82841.35288  73589.10495   
97.5134_8.3     328030.4006  350734.0581  355718.0382  412500.7903   
97.9314_18.4    21356.74107            0  2761.592027            0   
97.9678_10.1    251764.8698  164091.5434  145010.7167            0   
97.9904_35.9    12214618.72            0  16715439.85            0   
...                     ...          ...          ...          ...   
1150.7485_41.3       253252       153609            0       241698   
1152.6906_41.8       221759       712906       437720       624170   
1159.7096_41.8       118403      50574.6            0       160497   
1160.7025_42.6       243704       394390            0       334405   
1164.6497_43.5       647271            0       234210            0   
1166.7250_40.5            0       433500       197358            0   
1172.7414_44.5            0            0            0       321350   
1176.6816_42.0       693270       193759       456820            0   
1182.7060_40.1       199376       660619       345962            0   
1184.7142_42.1       310800       162185       226698       171801   
1188.7304_40.8  1.61999e+06       533974       276045       431862   
1200.6901_42.3       287970        81408       158412       389458   
1204.7010_40.7       241683       281969       301487       243510   
1210.7114_41.6       652220       560594       337794       197932   
1216.6546_42.7  1.22842e+06            0       154846  1.20079e+06   
1226.6901_42.2            0       477903       800111       241062   
1234.7163_39.7       441357            0            0            0   
1242.6644_42.5       211118       116803       500413       162709   
1250.6978_41.0       117614       463785       698114            0   
1252.6974_42.1       317729            0       263105            0   
1256.7109_40.4       602697       419694       185495       593161   
1262.7252_41.3       350081            0            0       294905   
1272.6851_41.2       490005       193553       187344       310078   
1274.4884_13.6            0       195070      64733.7      46737.4   
1278.7022_41.9       738674      49401.4       286510       717521   
1294.6764_42.5       428067            0       659991      55798.6   
1324.6985_41.2       688980       270826            0            0   
1330.7125_41.2       668409       664346            0            0   
1346.6934_39.8       257716            0       585774            0   
1362.6598_41.6            0       515858      89219.1       104976   

                    2009264      2009249     ...           2008654  \
Samples                                      ...                     
90.0075_8.6     6628460.509  4859109.333     ...       3361374.609   
90.5060_7.9     5714550.409  4147867.037     ...       6515632.634   
90.5092_17.6              0            0     ...                 0   
90.5249_36.0    3530316.311  3574931.424     ...       8017484.285   
90.9756_43.2    5252845.756  4680603.346     ...       6676109.719   
91.0077_10.9    311465.8456  307836.6964     ...       242635.6677   
91.0225_34.9    497603.9063  446483.2171     ...       701208.7806   
91.0266_36.8    769231.8236  324397.3933     ...       575749.0563   
91.5011_18.2              0            0     ...                 0   
91.5037_8.1     2020360.392  1522009.861     ...       1835541.686   
91.9790_41.8              0  11123.72842     ...                 0   
92.0389_37.6    20462.98106            0     ...       90017.77892   
92.5025_10.1    219736.6895   142536.304     ...       125295.0378   
92.5213_33.7    102441.4833  156594.5796     ...        144992.212   
93.5024_15.7    52957.03155  68852.68548     ...       142169.8328   
93.5246_36.8    172734.8387  196224.1773     ...       270898.7648   
94.0441_38.1    473149.3673  580080.7362     ...       399036.1415   
94.5141_36.3              0            0     ...       53301.92095   
94.5447_38.7              0  31749.12229     ...       309228.9378   
95.0152_57.8              0            0     ...                 0   
95.0230_16.6    296189.2394  209981.6147     ...       198817.3782   
95.0427_38.9              0   27318.3374     ...       78373.60588   
96.0797_94.5              0            0     ...                 0   
96.5139_14.9    109323.8707   22105.0613     ...       113418.6128   
96.9207_46.7    2678920.487  1756197.048     ...       2494157.328   
96.9999_37.5    105832.4549  50210.94216     ...        80148.1356   
97.5134_8.3     1046059.215            0     ...       533657.5483   
97.9314_18.4       89317.72  30864.55386     ...                 0   
97.9678_10.1    505037.1538  168095.6049     ...       251666.9195   
97.9904_35.9    9857038.198  7197632.324     ...       13634659.49   
...                     ...          ...     ...               ...   
1150.7485_41.3            0            0     ...                 0   
1152.6906_41.8            0       173267     ...            713792   
1159.7096_41.8       152925      97433.4     ...           39313.9   
1160.7025_42.6       140182       877012     ...            124713   
1164.6497_43.5            0            0     ...             63349   
1166.7250_40.5   1.1227e+06       313999     ...            173853   
1172.7414_44.5       599857       550101     ...            190112   
1176.6816_42.0            0            0     ...            399313   
1182.7060_40.1       521186       161269     ...            349973   
1184.7142_42.1      91964.6  1.21821e+06     ...            170300   
1188.7304_40.8            0       986078     ...            342936   
1200.6901_42.3       647794            0     ...                 0   
1204.7010_40.7       380106            0     ...            802973   
1210.7114_41.6       413424       867446     ...            534480   
1216.6546_42.7       469990            0     ...                 0   
1226.6901_42.2  1.42154e+06       151472     ...       1.55489e+06   
1234.7163_39.7       274086            0     ...            190889   
1242.6644_42.5       426523            0     ...                 0   
1250.6978_41.0       486227       653927     ...            230726   
1252.6974_42.1      59348.6            0     ...                 0   
1256.7109_40.4       472934            0     ...            418116   
1262.7252_41.3       778930       140869     ...                 0   
1272.6851_41.2            0  1.03003e+06     ...            297656   
1274.4884_13.6        11786            0     ...           41206.3   
1278.7022_41.9       689772       356750     ...                 0   
1294.6764_42.5       435056       470897     ...                 0   
1324.6985_41.2       482574            0     ...            449095   
1330.7125_41.2       439209            0     ...           98812.2   
1346.6934_39.8            0            0     ...            285468   
1362.6598_41.6       598477      57690.2     ...            208159   

                    2008627      2009254      2009225      2008722  \
Samples                                                              
90.0075_8.6     3225108.739   3234679.55  2257954.805  2829996.817   
90.5060_7.9               0  4333684.698  3044488.261            0   
90.5092_17.6              0  14851.66607            0            0   
90.5249_36.0    8698940.475  6447841.305  10352306.69  9928524.935   
90.9756_43.2     5564232.26    6342556.3  5943083.312  6767760.897   
91.0077_10.9    221675.7999  229490.9118  248242.2838   224932.697   
91.0225_34.9    695549.5147   593596.924  1011122.885  785552.2501   
91.0266_36.8    551845.2813  436312.5506  575679.7514  489581.8057   
91.5011_18.2              0  6565.430852            0            0   
91.5037_8.1     1965144.129  1578519.992  1254855.662  1291675.706   
91.9790_41.8              0            0            0            0   
92.0389_37.6    27108.19628  22884.95239  55619.15778  185537.0365   
92.5025_10.1    98060.14562  129047.4041  188838.3328  72702.59088   
92.5213_33.7    165804.0998  225968.6949   156295.635  141690.4117   
93.5024_15.7              0  79452.75602  65749.38042  70760.57411   
93.5246_36.8    94667.28808  38438.90541  37853.17185   35734.7688   
94.0441_38.1    582135.9545  245083.0499  331295.3117  404814.3372   
94.5141_36.3    33341.85178            0  11769.64065            0   
94.5447_38.7    177607.0408  47938.94201            0  144052.2379   
95.0152_57.8              0            0            0            0   
95.0230_16.6    211432.6586  205648.8066  194927.9413  186343.3331   
95.0427_38.9              0  41093.98201  84362.75101  102676.2014   
96.0797_94.5     15358.0945            0            0  76227.30182   
96.5139_14.9      60975.294  92310.41495  98057.64143   54860.3108   
96.9207_46.7    2734187.828  2554378.261  2394356.747  2363498.852   
96.9999_37.5    57838.59424  55947.09973  68260.73311  133619.6022   
97.5134_8.3     575149.4191  645061.3328  385306.8238  130282.1941   
97.9314_18.4    15665.88267  41070.37212  1278.119339  99409.88349   
97.9678_10.1    141662.7153  219310.2226  102009.4101  180498.9118   
97.9904_35.9     13609343.6    6809282.7  18107813.33            0   
...                     ...          ...          ...          ...   
1150.7485_41.3       181801            0       518178            0   
1152.6906_41.8       342004            0            0       343708   
1159.7096_41.8       157540       268747            0            0   
1160.7025_42.6       553264       300919       571595       228457   
1164.6497_43.5       543977       576016       576967       604614   
1166.7250_40.5       172955       363509       291942       319444   
1172.7414_44.5            0            0       548462            0   
1176.6816_42.0       350277       357032            0            0   
1182.7060_40.1       254124            0       993605            0   
1184.7142_42.1            0            0       747151       435627   
1188.7304_40.8       407942       455438       219977            0   
1200.6901_42.3       155805       257166            0       375715   
1204.7010_40.7       368911       496600            0       620421   
1210.7114_41.6       548794       911557       883027       357790   
1216.6546_42.7            0  1.04993e+06            0       296741   
1226.6901_42.2       562946       428840  2.07956e+06            0   
1234.7163_39.7       514409       147664       381773       359884   
1242.6644_42.5       475908            0            0      78522.3   
1250.6978_41.0       126206            0            0            0   
1252.6974_42.1            0       926891            0            0   
1256.7109_40.4       612677       115381       334048       759451   
1262.7252_41.3       134085       547387       241160            0   
1272.6851_41.2       168241       428102       465527            0   
1274.4884_13.6       112977            0      80682.2            0   
1278.7022_41.9       600876       796615            0            0   
1294.6764_42.5            0      54759.1       613293            0   
1324.6985_41.2       762961       710039       127949            0   
1330.7125_41.2       319145       280412       374427            0   
1346.6934_39.8       182446            0       200518            0   
1362.6598_41.6       213668            0            0      22031.9   

                    2008702      2008660      2008628      2008618  \
Samples                                                              
90.0075_8.6     3025325.917    3964125.1  3894615.297   3767459.37   
90.5060_7.9     3399020.112  4821618.007  6331004.895  6149395.473   
90.5092_17.6    120916.2108  128653.7856  78832.70997            0   
90.5249_36.0    7441818.805   4932827.72  8323500.152  7137622.677   
90.9756_43.2    5100041.402  6431736.961  5525051.032  5507681.851   
91.0077_10.9    283475.8571  335048.9052  255997.4283    407961.83   
91.0225_34.9    674660.9142  234798.2155  722024.3645  659349.3175   
91.0266_36.8    474608.3407   387506.319  545998.9721  489848.0843   
91.5011_18.2              0  40812.97441  38493.93123            0   
91.5037_8.1     1282284.369  1630517.095  2623260.837  1738926.971   
91.9790_41.8              0            0            0            0   
92.0389_37.6    35938.26805            0   34736.3584  90954.89343   
92.5025_10.1    185231.1968   117949.371   148770.824  191671.5297   
92.5213_33.7    103485.8296  221280.6808  133518.8514  195216.9817   
93.5024_15.7    72830.96387  104458.2272  148581.3145  80203.22047   
93.5246_36.8    32709.98062  20709.95204  42166.12442  38281.83138   
94.0441_38.1    330787.7181    160632.37  631936.1649  465924.0994   
94.5141_36.3              0            0  24722.04712            0   
94.5447_38.7              0  72689.83068  117244.1161   463674.667   
95.0152_57.8              0            0            0            0   
95.0230_16.6      262597.61  197604.7725  237954.4153  210414.3169   
95.0427_38.9    77443.32765            0            0            0   
96.0797_94.5              0            0            0            0   
96.5139_14.9              0  85138.02335  153426.8202  152406.3402   
96.9207_46.7    2710644.056  2049517.417   2332387.31  2417197.841   
96.9999_37.5    68117.77475  37671.48808  89552.15565  65440.96744   
97.5134_8.3     215490.5143  379417.2145   488748.158  520599.7407   
97.9314_18.4    54467.64564  5564.985712  67416.28361   19853.1375   
97.9678_10.1              0  467239.6908            0  429457.5677   
97.9904_35.9    13531626.59  8369852.054  14622987.82  12939864.27   
...                     ...          ...          ...          ...   
1150.7485_41.3            0       144221            0            0   
1152.6906_41.8            0       222431            0            0   
1159.7096_41.8       399812       442560            0            0   
1160.7025_42.6       103770       302851       172828       366954   
1164.6497_43.5            0            0       170019            0   
1166.7250_40.5       147748       155121       122978       237372   
1172.7414_44.5       954418            0       194401      76583.6   
1176.6816_42.0       396745            0       805954            0   
1182.7060_40.1       143558       241327       642359       310534   
1184.7142_42.1            0       847336      6796.39            0   
1188.7304_40.8       128872       686349            0            0   
1200.6901_42.3       333187            0            0      83505.5   
1204.7010_40.7            0            0       879413       552615   
1210.7114_41.6       254607       967732       719544       774428   
1216.6546_42.7       309568            0       288102       176586   
1226.6901_42.2       501449       473750       897216            0   
1234.7163_39.7            0            0            0       516997   
1242.6644_42.5       617054       319314       224825            0   
1250.6978_41.0            0            0       418538            0   
1252.6974_42.1            0       441698            0            0   
1256.7109_40.4       635062       473151       271811            0   
1262.7252_41.3       376295       239076       438242      82801.7   
1272.6851_41.2            0       369878       841548       343527   
1274.4884_13.6       112784       177104            0       278859   
1278.7022_41.9            0       541889            0       502626   
1294.6764_42.5            0       201599            0       749223   
1324.6985_41.2       209186       258962            0            0   
1330.7125_41.2            0       373464      49976.9  1.01262e+06   
1346.6934_39.8      12980.6       459267            0            0   
1362.6598_41.6       418287       565639       250577            0   

                    2008621  
Samples                      
90.0075_8.6     1860727.982  
90.5060_7.9     4007774.961  
90.5092_17.6              0  
90.5249_36.0              0  
90.9756_43.2    5640639.179  
91.0077_10.9    357468.8174  
91.0225_34.9    368513.9601  
91.0266_36.8    198919.9106  
91.5011_18.2              0  
91.5037_8.1     985253.1105  
91.9790_41.8              0  
92.0389_37.6    14103.02042  
92.5025_10.1    202568.1988  
92.5213_33.7    202900.2753  
93.5024_15.7              0  
93.5246_36.8    5480.454853  
94.0441_38.1              0  
94.5141_36.3              0  
94.5447_38.7    31354.10138  
95.0152_57.8              0  
95.0230_16.6    283749.0126  
95.0427_38.9      24908.972  
96.0797_94.5    94288.61018  
96.5139_14.9              0  
96.9207_46.7    3265252.777  
96.9999_37.5    50637.10037  
97.5134_8.3     342516.6079  
97.9314_18.4     52442.8171  
97.9678_10.1    215695.3047  
97.9904_35.9    9083910.197  
...                     ...  
1150.7485_41.3            0  
1152.6906_41.8       636275  
1159.7096_41.8       216925  
1160.7025_42.6       572814  
1164.6497_43.5  1.03367e+06  
1166.7250_40.5       168988  
1172.7414_44.5       515194  
1176.6816_42.0       140231  
1182.7060_40.1            0  
1184.7142_42.1       349732  
1188.7304_40.8       449743  
1200.6901_42.3       500711  
1204.7010_40.7       471584  
1210.7114_41.6       500730  
1216.6546_42.7      91453.1  
1226.6901_42.2       455448  
1234.7163_39.7       107935  
1242.6644_42.5       546823  
1250.6978_41.0            0  
1252.6974_42.1        81849  
1256.7109_40.4       961921  
1262.7252_41.3       560192  
1272.6851_41.2       499323  
1274.4884_13.6            0  
1278.7022_41.9       486178  
1294.6764_42.5       434913  
1324.6985_41.2            0  
1330.7125_41.2       497848  
1346.6934_39.8            0  
1362.6598_41.6       838983  

[2229 rows x 64 columns]>
slice shape (64, 2229)
0.0% done! 4.78106617928s elapsed



In [388]:

    
def make_sliding_window(min_val, max_val, width, step):
    '''
    Width should be
    '''
    if step > width:
        raise ValueError, ("Your step should be less than"+
                           "or equal to the width of the window")
    left_bound = np.arange(min_val, max_val, step)
    right_bound = left_bound + width
    rt_bounds = zip(left_bound, right_bound)
    # remove any bounds that go past the maximum value
    for idx, i in enumerate(rt_bounds):
        if i[1] > max_val:
            rt_bounds.pop(idx)
    return rt_bounds

make_sliding_window(0, 100, 10, 5)









    Out[388]:





[(0, 10),
 (5, 15),
 (10, 20),
 (15, 25),
 (20, 30),
 (25, 35),
 (30, 40),
 (35, 45),
 (40, 50),
 (45, 55),
 (50, 60),
 (55, 65),
 (60, 70),
 (65, 75),
 (70, 80),
 (75, 85),
 (80, 90),
 (85, 95),
 (90, 100)]



In [396]:

    
# Make sliding window
min_val = 0
max_val = df_50percent_prevalence_mzrt['rt'].max()
width = max_val / 5.0
step = width / 2
print "min: {mini}, max: {maxi}, width: {width}, step: {step}".format(
        mini=min_val, maxi=max_val, width=width, step=step)
sliding_window = make_sliding_window(min_val, max_val, width, step)

# plot and run classifier on sliding window
for rt_slice in sliding_window:
    print rt_slice
    slice_and_predict(df_50percent_prevalence_mzrt, y, rt_slice,
                      rf_estimators=1000, 
                     n_iter=25, test_size=0.3, random_state=1)
    print '\n\n\n'+'-'*50+'NEXT ROUND'+'-'*50+'\n\n\n'









    



min: 0, max: 596.6, width: 119.32, step: 59.66
(0.0, 119.32000000000001)
596.6
1984.3186
ymin:  39.5059






    












    



slice shape (66, 2248)
0.0% done! 3.12874293327s elapsed
40.0% done! 38.1319730282s elapsed
80.0% done! 74.1431500912s elapsed






    












    





--------------------------------------------------NEXT ROUND--------------------------------------------------



(59.660000000000004, 178.98000000000002)
596.6
1984.3186
ymin:  39.5059






    












    



slice shape (66, 243)
0.0% done! 4.19101309776s elapsed
40.0% done! 38.577999115s elapsed
80.0% done! 74.5704450607s elapsed






    












    





--------------------------------------------------NEXT ROUND--------------------------------------------------



(119.32000000000001, 238.64000000000001)
596.6
1984.3186
ymin:  39.5059






    












    



slice shape (66, 91)
0.0% done! 3.03325486183s elapsed
40.0% done! 33.2707278728s elapsed
80.0% done! 62.8525710106s elapsed






    












    





--------------------------------------------------NEXT ROUND--------------------------------------------------



(178.98000000000002, 298.30000000000001)
596.6
1984.3186
ymin:  39.5059






    












    



slice shape (66, 245)
0.0% done! 3.43220806122s elapsed
40.0% done! 31.9505441189s elapsed
80.0% done! 60.5420660973s elapsed






    












    





--------------------------------------------------NEXT ROUND--------------------------------------------------



(238.64000000000001, 357.96000000000004)
596.6
1984.3186
ymin:  39.5059






    












    



slice shape (66, 672)
0.0% done! 3.0954041481s elapsed
40.0% done! 32.2163431644s elapsed
80.0% done! 61.3447351456s elapsed






    












    





--------------------------------------------------NEXT ROUND--------------------------------------------------



(298.30000000000001, 417.62)
596.6
1984.3186
ymin:  39.5059






    












    



slice shape (66, 1314)
0.0% done! 3.45354104042s elapsed
40.0% done! 33.3407568932s elapsed
80.0% done! 63.2684009075s elapsed






    












    





--------------------------------------------------NEXT ROUND--------------------------------------------------



(357.96000000000004, 477.28000000000003)
596.6
1984.3186
ymin:  39.5059






    












    



slice shape (66, 1346)
0.0% done! 3.14566516876s elapsed
40.0% done! 34.858104229s elapsed
80.0% done! 69.7666180134s elapsed






    












    





--------------------------------------------------NEXT ROUND--------------------------------------------------



(417.62, 536.94000000000005)
596.6
1984.3186
ymin:  39.5059






    












    



slice shape (66, 919)
0.0% done! 4.22216081619s elapsed
40.0% done! 39.1709859371s elapsed
80.0% done! 76.0188238621s elapsed






    












    





--------------------------------------------------NEXT ROUND--------------------------------------------------



(477.28000000000003, 596.60000000000002)
596.6
1984.3186
ymin:  39.5059






    












    



slice shape (66, 793)
0.0% done! 4.71782708168s elapsed
40.0% done! 52.1401121616s elapsed
80.0% done! 85.8386499882s elapsed






    












    





--------------------------------------------------NEXT ROUND--------------------------------------------------

	mz	rt	2009227	2008791	2008700	2008675	2008630	2007260	2008235	2009263	...	2008654	2008627	2009254	2009225	2008722	2008702	2008660	2008628	2008618	2008621
Samples
89.5059_435.1	89.5059	435.1	0	0	8257841.742	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
90.0074_371.2	90.0074	371.2	10498841.66	9841835.671	9580520.612	10914163.22	9153719.487	10828925.47	11772214.82	10512799.3	...	7026674.414	7032003.049	7719911.028	7256920.416	8141573.982	7984498.671	9110181.769	10796345.17	8011757.469	6165551.241
90.0075_8.6	90.0075	8.6	3965161.294	2781320.516	1717467.517	4557703.401	3319942.933	2346799.773	16144.20002	3191167.455	...	3361374.609	3225108.739	3234679.55	2257954.805	2829996.817	3025325.917	3964125.1	3894615.297	3767459.37	1860727.982
90.0076_439.3	90.0076	439.3	0	0	1074652.702	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
90.0902_271.2	90.0902	271.2	149087.1152	0	0	0	0	74752.09517	0	0	...	0	134253.2046	0	0	0	0	226999.9155	0	224761.5756	0
90.5060_435.3	90.5060	435.3	0	0	884390.5513	0	0	1974046.316	1700740.002	0	...	0	0	2882807.655	780882.399	0	0	0	1167876.502	0	0
90.5060_7.9	90.5060	7.9	6058519.493	2231072.165	423239.0544	3850227.892	5264008.161	4112418.498	2444736.853	5342918.932	...	6515632.634	0	4333684.698	3044488.261	0	3399020.112	4821618.007	6331004.895	6149395.473	4007774.961
90.5060_556.1	90.5060	556.1	0	0	4244221.354	4864197.202	0	4951916.472	4896935.91	1427984.56	...	0	2333475.243	1396999.005	0	4685815.578	4315130.185	1679274.02	3989390.666	0	0
90.5090_369.6	90.5090	369.6	0	0	0	0	0	0	0	0	...	0	0	0	0	0	125527.8799	238728.988	0	0	0
90.5092_17.6	90.5092	17.6	0	115455.5355	0	0	28068.56722	0	0	0	...	0	0	14851.66607	0	0	120916.2108	128653.7856	78832.70997	0	0
90.5247_196.0	90.5247	196.0	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
90.5248_375.3	90.5248	375.3	3021854.317	0	0	2220677.433	0	0	0	0	...	406403.6735	0	0	0	0	3537445.909	0	7253540.674	0	3327266.86
90.5249_36.0	90.5249	36.0	5448438.699	7198422.653	9807786.115	6275756.742	4291497.217	6524701.024	0	6394814.163	...	8017484.285	8698940.475	6447841.305	10352306.69	9928524.935	7441818.805	4932827.72	8323500.152	7137622.677	0
90.9756_43.2	90.9756	43.2	5969641.319	7682736.664	6454374.441	5167697.656	7710570.018	5836218.512	5608189.373	4921445.729	...	6676109.719	5564232.26	6342556.3	5943083.312	6767760.897	5100041.402	6431736.961	5525051.032	5507681.851	5640639.179
91.0077_10.9	91.0077	10.9	350121.5799	280759.0341	265857.4427	350320.5165	187919.9025	212206.6134	246438.1215	262296.1873	...	242635.6677	221675.7999	229490.9118	248242.2838	224932.697	283475.8571	335048.9052	255997.4283	407961.83	357468.8174
91.0077_573.9	91.0077	573.9	0	0	0	0	0	71452.65731	0	0	...	0	0	0	0	0	0	0	0	0	0
91.0225_34.9	91.0225	34.9	414013.9793	0	951154.1525	621837.0755	440505.0904	481915.8709	1085349.859	472663.6129	...	701208.7806	695549.5147	593596.924	1011122.885	785552.2501	674660.9142	234798.2155	722024.3645	659349.3175	368513.9601
91.0225_308.7	91.0225	308.7	744811.4944	783890.241	0	0	0	0	1044914.369	0	...	0	0	784667.4444	0	0	0	0	0	0	0
91.0266_36.8	91.0266	36.8	323496.014	419842.6729	600436.0097	393878.3168	262175.7103	421403.3179	550224.7103	397568.9655	...	575749.0563	551845.2813	436312.5506	575679.7514	489581.8057	474608.3407	387506.319	545998.9721	489848.0843	198919.9106
91.0534_374.6	91.0534	374.6	0	0	0	0	0	37166.76623	297235.8426	0	...	51769.32302	55167.76522	0	0	55177.71829	0	0	0	0	0
91.5011_18.2	91.5011	18.2	0	0	0	0	8205.302463	59620.41605	63188.63976	0	...	0	0	6565.430852	0	0	0	40812.97441	38493.93123	0	0
91.5036_434.7	91.5036	434.7	0	0	0	0	0	0	650499.8168	0	...	0	0	0	0	0	0	0	0	0	0
91.5037_8.1	91.5037	8.1	2113778.491	1035747.08	596398.0602	1648143.578	1999672.742	1023945.405	0	1702671.468	...	1835541.686	1965144.129	1578519.992	1254855.662	1291675.706	1282284.369	1630517.095	2623260.837	1738926.971	985253.1105
91.5037_555.0	91.5037	555.0	0	796994.0787	1678355.833	1003823.215	1746155.359	889175.3619	1773346.163	766091.1939	...	803211.2381	989886.4252	496448.3167	779719.8628	1759576.634	1075611.757	563945.8533	1605973.717	509069.1679	0
91.9790_41.8	91.9790	41.8	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
92.0049_573.6	92.0049	573.6	0	0	0	0	0	0	0	0	...	0	73328.39513	0	143190.6162	56492.28231	0	0	0	0	57865.94175
92.0389_37.6	92.0389	37.6	0	0	0	0	0	55056.23856	31438.43584	34584.76624	...	90017.77892	27108.19628	22884.95239	55619.15778	185537.0365	35938.26805	0	34736.3584	90954.89343	14103.02042
92.5025_10.1	92.5025	10.1	196277.8927	87930.64936	204554.4669	140074.234	75841.48121	116998.9605	203119.1601	203405.359	...	125295.0378	98060.14562	129047.4041	188838.3328	72702.59088	185231.1968	117949.371	148770.824	191671.5297	202568.1988
92.5025_562.9	92.5025	562.9	0	145322.964	0	0	0	71125.79177	136892.2807	152640.6671	...	0	109437.3279	0	138676.3812	0	0	0	0	0	122478.3809
92.5025_448.2	92.5025	448.2	0	0	0	0	0	0	0	0	...	0	0	0	0	0	0	0	0	0	0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
1333.9466_432.0	1333.9466	432.0	363706	254383	629712	380184	785763	0	125991	235487	...	0	0	136440	185800	244905	0	259650	0	441671	0
1346.6934_39.8	1346.6934	39.8	234329	263865	894916	382636	257716	0	585774	0	...	285468	182446	0	200518	0	12980.6	459267	0	0	0
1362.6598_41.6	1362.6598	41.6	369198	500438	0	0	0	515858	89219.1	104976	...	208159	213668	0	0	22031.9	418287	565639	250577	0	838983
1487.5033_417.3	1487.5033	417.3	277395	1.21835e+06	1.56868e+06	1.4268e+06	1.50004e+06	433265	187134	1.27366e+06	...	984057	2.2708e+06	2.49157e+06	2.90481e+06	374955	2.33353e+06	679352	1.08524e+06	5.33895e+06	1.22672e+06
1488.0020_417.5	1488.0020	417.5	1.11106e+06	7.14363e+06	7.32453e+06	7.466e+06	1.05085e+07	2.74557e+06	717243	7.20509e+06	...	4.55781e+06	1.16489e+07	1.39584e+07	1.35243e+07	3.2937e+06	9.34309e+06	4.25661e+06	4.85552e+06	1.65558e+07	4.43804e+06
1488.5009_417.4	1488.5009	417.4	0	707461	775796	674553	617459	80112.2	0	710955	...	895621	1.27398e+06	3.90967e+06	945761	329427	957294	507604	235312	3.30025e+06	242450
1490.0102_417.4	1490.0102	417.4	237892	942951	861405	1.09992e+06	904029	304760	354768	740417	...	443855	1.2584e+06	1.7558e+06	2.18381e+06	0	1.58963e+06	878667	0	0	826558
1490.9995_417.5	1490.9995	417.5	0	502305	1.94181e+06	0	1.73085e+06	0	0	947171	...	391002	1.11622e+06	252711	1.2137e+06	0	820919	909906	626392	3.70908e+06	123263
1497.9852_418.5	1497.9852	418.5	18198.8	725228	399984	908093	711426	706436	0	730910	...	870677	757316	715944	2.49575e+06	551390	895318	400494	468692	517338	34710.9
1498.4782_418.0	1498.4782	418.0	1.473e+06	47696	562845	1.56137e+06	578754	0	0	86531.5	...	316964	416897	2.95054e+06	1.32693e+06	251191	504571	444715	0	1.22777e+06	331470
1498.9934_417.3	1498.9934	417.3	586778	794281	318663	320658	292070	283998	0	662019	...	606554	216305	525353	633034	0	219029	758872	220786	897147	1.05328e+06
1499.4869_417.4	1499.4869	417.4	13173.5	406366	49873.9	0	722235	202596	0	855035	...	633549	396916	336799	837536	0	704534	349528	0	0	0
1506.4662_417.1	1506.4662	417.1	0	474581	660630	1.14833e+06	267524	0	0	139587	...	579881	756347	836195	1.10743e+06	0	198979	591199	0	353258	935934
1506.9634_417.3	1506.9634	417.3	0	0	2.14088e+06	432544	1.7441e+06	0	0	511794	...	1.18723e+06	1.94759e+06	588442	668157	0	0	0	0	0	0
1508.9547_418.5	1508.9547	418.5	0	0	0	0	0	0	0	0	...	1.36492e+06	1.70563e+06	0	0	668008	1.48265e+06	868447	839569	2.10961e+06	1.22649e+06
1509.4864_417.5	1509.4864	417.5	272379	915517	371483	591921	1.47227e+06	1.35451e+06	0	1.10731e+06	...	0	1.31202e+06	723156	635745	432109	0	1.52411e+06	633306	0	0
1509.9852_418.0	1509.9852	418.0	619030	1.08713e+06	1.08287e+06	1.17719e+06	1.06641e+06	977258	152933	1.5519e+06	...	1.21676e+06	1.88511e+06	1.60785e+06	1.45525e+06	1.24603e+06	1.45683e+06	1.12249e+06	1.00433e+06	0	1.50663e+06
1510.9611_417.9	1510.9611	417.9	0	0	0	0	0	0	0	0	...	499558	662341	625116	0	467204	487650	308450	264955	821691	874620
1514.4625_417.4	1514.4625	417.4	368950	963761	792611	1.30887e+06	896520	560616	218595	926808	...	1.14567e+06	1.49725e+06	1.82213e+06	1.66125e+06	517588	1.03396e+06	533479	868694	0	1.40544e+06
1514.9637_417.6	1514.9637	417.6	356141	2.01115e+06	762706	4.31643e+06	1.06663e+06	567262	159991	1.14849e+06	...	0	1.19869e+06	1.61884e+06	4.82512e+06	0	1.23353e+06	559614	1.57724e+06	0	0
1565.0444_430.9	1565.0444	430.9	1478.76	298353	25286.9	147028	320048	0	0	43190.4	...	0	213184	181380	1.12326e+06	110779	0	0	0	902647	0
1566.0391_432.0	1566.0391	432.0	85943.4	587645	0	331745	263775	0	0	189343	...	0	81710.9	408747	332965	131796	155424	145495	131871	633759	0
1577.8963_418.0	1577.8963	418.0	342378	204815	776990	252045	257158	499335	383471	186745	...	263563	0	429490	477172	197101	744663	352338	309261	0	561984
1578.8915_418.4	1578.8915	418.4	469602	458640	0	144189	674929	295683	236287	341081	...	940247	317598	353704	353458	389799	0	564795	0	0	0
1735.1634_417.2	1735.1634	417.2	358808	843433	925776	1.00208e+06	1.14441e+06	242722	0	909280	...	469164	1.90716e+06	1.40435e+06	1.71562e+06	253114	1.47031e+06	910961	421894	2.81354e+06	1.40064e+06
1735.6551_417.7	1735.6551	417.7	0	1.25331e+06	0	1.6635e+06	1.22537e+06	0	0	1.0357e+06	...	607725	2.31893e+06	2.17909e+06	1.83453e+06	509136	1.0987e+06	1.31286e+06	1.01901e+06	2.93617e+06	856519
1762.1148_417.4	1762.1148	417.4	0	701023	388293	475046	394144	746369	0	666752	...	0	1.7547e+06	632190	515846	0	936565	1.0517e+06	0	633518	0
1763.1075_417.3	1763.1075	417.3	0	0	510225	0	0	0	0	176634	...	520883	118599	367589	1.70472e+06	0	305821	43446.4	348186	488637	0
1983.3337_417.2	1983.3337	417.2	1036.54	460889	714522	760267	874229	228267	0	475948	...	294456	845642	1.41258e+06	0	0	0	1.07847e+06	613841	2.0938e+06	0
1984.3186_417.5	1984.3186	417.5	0	322134	322114	72068.8	697438	0	0	0	...	45538.6	565770	686524	691060	0	374726	17701.9	249300	984439	0