Primero pimpea tu libreta!



In [1]:

    
from IPython.core.display import HTML
import os
def css_styling():
    """Load default custom.css file from ipython profile"""
    base = os.getcwd()
    styles = "<style>\n%s\n</style>" % (open(os.path.join(base,'files/custom.css'),'r').read())
    return HTML(styles)
css_styling()









    Out[1]:



In [4]:

    
import pandas as pd
import numpy as np



In [5]:

    
df = pd.read_csv('training.csv')



In [7]:

    
df.head(1)









    Out[7]:






  
    
      
      EventId
      DER_mass_MMC
      DER_mass_transverse_met_lep
      DER_mass_vis
      DER_pt_h
      DER_deltaeta_jet_jet
      DER_mass_jet_jet
      DER_prodeta_jet_jet
      DER_deltar_tau_lep
      DER_pt_tot
      ...
      PRI_jet_num
      PRI_jet_leading_pt
      PRI_jet_leading_eta
      PRI_jet_leading_phi
      PRI_jet_subleading_pt
      PRI_jet_subleading_eta
      PRI_jet_subleading_phi
      PRI_jet_all_pt
      Weight
      Label
    
  
  
    
      0
      100000
      138.47
      51.655
      97.827
      27.98
      0.91
      124.711
      2.666
      3.064
      41.928
      ...
      2
      67.435
      2.15
      0.444
      46.062
      1.24
      -2.475
      113.497
      0.002653
      s
    
  

1 rows × 33 columns



In [10]:

    
bueno=df['Label'].replace(to_replace=['s','b'],value=[1,0])
df['class_int']= bueno



In [13]:

    
df.drop('EventId',axis=1,inplace=True)
df.drop('Label',axis=1,inplace=True)
df.drop('class_int',axis=1,inplace=True)



In [29]:

    
X = df.values
Y = bueno



In [32]:

    
print(X.shape)
print(Y.shape)









    



(250000, 31)
(250000,)



In [33]:



In [34]:

    
print(X_train.shape)
print(Y_train.shape)









    



(12500, 31)
(12500,)

experimentar con

max_features
max_depth
min_samples_leaf
n_estimators



In [45]:

    
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(
    max_features=3,
    max_depth=10,
    min_samples_leaf=50,
    n_estimators=5
)
clf.fit(X_train,Y_train)
clf.score(X_test,Y_test)









    Out[45]:





0.93377684210526313