In [1]:
from sklearn import svm
import pandas as pd
from sklearn.ensemble import ExtraTreesClassifier
from sklearn.cross_validation import cross_val_score
from sklearn.preprocessing import Imputer

In [2]:
train_df = pd.read_csv("../data/training-weka.csv", na_values = ['?'], sep=',')
list(train_df.columns.values)


Out[2]:
['EventId',
 'DER_mass_MMC',
 'DER_mass_transverse_met_lep',
 'DER_mass_vis',
 'DER_pt_h',
 'DER_deltaeta_jet_jet',
 'DER_mass_jet_jet',
 'DER_prodeta_jet_jet',
 'DER_deltar_tau_lep',
 'DER_pt_tot',
 'DER_sum_pt',
 'DER_pt_ratio_lep_tau',
 'DER_met_phi_centrality',
 'DER_lep_eta_centrality',
 'PRI_tau_pt',
 'PRI_tau_eta',
 'PRI_tau_phi',
 'PRI_lep_pt',
 'PRI_lep_eta',
 'PRI_lep_phi',
 'PRI_met',
 'PRI_met_phi',
 'PRI_met_sumet',
 'PRI_jet_num',
 'PRI_jet_leading_pt',
 'PRI_jet_leading_eta',
 'PRI_jet_leading_phi',
 'PRI_jet_subleading_pt',
 'PRI_jet_subleading_eta',
 'PRI_jet_subleading_phi',
 'PRI_jet_all_pt',
 'Weight',
 'Label']

In [ ]:
et = ExtraTreesClassifier(n_estimators=100, max_depth=None, min_samples_split=1, random_state=0)
 
columns = list(train_df.columns.values)
 
labels = train_df["Label"].values
features = train_df[columns[1:31]].values

imp = Imputer(missing_values='NaN', strategy='mean', axis=0, copy=False)
features = imp.fit_transform(features)
 
et_score = cross_val_score(et, features, labels, n_jobs=-1).mean()
 
print("{0} -> ET: {1})".format(columns, et_score))

In [ ]: