Primero pimpea tu libreta!


In [1]:
from IPython.core.display import HTML
import os
def css_styling():
    """Load default custom.css file from ipython profile"""
    base = os.getcwd()
    styles = "<style>\n%s\n</style>" % (open(os.path.join(base,'files/custom.css'),'r').read())
    return HTML(styles)
css_styling()


Out[1]:

In [4]:
import pandas as pd
import numpy as np

In [5]:
df = pd.read_csv('training.csv')

In [7]:
df.head(1)


Out[7]:
EventId DER_mass_MMC DER_mass_transverse_met_lep DER_mass_vis DER_pt_h DER_deltaeta_jet_jet DER_mass_jet_jet DER_prodeta_jet_jet DER_deltar_tau_lep DER_pt_tot ... PRI_jet_num PRI_jet_leading_pt PRI_jet_leading_eta PRI_jet_leading_phi PRI_jet_subleading_pt PRI_jet_subleading_eta PRI_jet_subleading_phi PRI_jet_all_pt Weight Label
0 100000 138.47 51.655 97.827 27.98 0.91 124.711 2.666 3.064 41.928 ... 2 67.435 2.15 0.444 46.062 1.24 -2.475 113.497 0.002653 s

1 rows × 33 columns


In [10]:
bueno=df['Label'].replace(to_replace=['s','b'],value=[1,0])
df['class_int']= bueno

In [13]:
df.drop('EventId',axis=1,inplace=True)
df.drop('Label',axis=1,inplace=True)
df.drop('class_int',axis=1,inplace=True)

In [29]:
X = df.values
Y = bueno

In [32]:
print(X.shape)
print(Y.shape)


(250000, 31)
(250000,)

In [33]:


In [34]:
print(X_train.shape)
print(Y_train.shape)


(12500, 31)
(12500,)

experimentar con

  • max_features
  • max_depth
  • min_samples_leaf
  • n_estimators

In [45]:
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(
    max_features=3,
    max_depth=10,
    min_samples_leaf=50,
    n_estimators=5
)
clf.fit(X_train,Y_train)
clf.score(X_test,Y_test)


Out[45]:
0.93377684210526313