Primero pimpea tu libreta!



In [2]:

    
from IPython.core.display import HTML
import os
def css_styling():
    """Load default custom.css file from ipython profile"""
    base = os.getcwd()
    styles = "<style>\n%s\n</style>" % (open(os.path.join(base,'files/custom.css'),'r').read())
    return HTML(styles)
css_styling()









    Out[2]:



In [3]:

    
import pandas as pd
import numpy as np



In [4]:

    
df = pd.read_csv('training.csv')



In [5]:

    
df.head(1)









    Out[5]:






  
    
      
      EventId
      DER_mass_MMC
      DER_mass_transverse_met_lep
      DER_mass_vis
      DER_pt_h
      DER_deltaeta_jet_jet
      DER_mass_jet_jet
      DER_prodeta_jet_jet
      DER_deltar_tau_lep
      DER_pt_tot
      ...
      PRI_jet_num
      PRI_jet_leading_pt
      PRI_jet_leading_eta
      PRI_jet_leading_phi
      PRI_jet_subleading_pt
      PRI_jet_subleading_eta
      PRI_jet_subleading_phi
      PRI_jet_all_pt
      Weight
      Label
    
  
  
    
      0
      100000
      138.47
      51.655
      97.827
      27.98
      0.91
      124.711
      2.666
      3.064
      41.928
      ...
      2
      67.435
      2.15
      0.444
      46.062
      1.24
      -2.475
      113.497
      0.002653
      s
    
  

1 rows × 33 columns



In [6]:

    
bueno=df['Label'].replace(to_replace=['s','b'],value=[1,0])
df['class_int']= bueno



In [7]:

    
df.drop('EventId',axis=1,inplace=True)
df.drop('Label',axis=1,inplace=True)
df.drop('class_int',axis=1,inplace=True)



In [8]:

    
X = df.values
Y = bueno



In [9]:

    
print(X.shape)
print(Y.shape)









    



(250000, 31)
(250000,)



In [11]:

    
X_train,X_test, Y_train, Y_test= train_test_split(X,Y,test_size=0.95)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-11-c481711203cc> in <module>()
----> 1 X_train,X_test, Y_train, Y_test= train_test_split(X,Y,test_size=0.95)

NameError: name 'train_test_split' is not defined



In [10]:

    
print(X_train.shape)
print(Y_train.shape)









    



---------------------------------------------------------------------------
NameError                                 Traceback (most recent call last)
<ipython-input-10-d60f5268cef7> in <module>()
----> 1 print(X_train.shape)
      2 print(Y_train.shape)

NameError: name 'X_train' is not defined

experimentar con

max_features
max_depth
min_samples_leaf
n_estimators



In [ ]:

    
from sklearn.ensemble import RandomForestClassifier

clf = RandomForestClassifier(
    max_features=3,
    max_depth=10,
    min_samples_leaf=50,
    n_estimators=5
)
clf.fit(X_train,Y_train)
clf.score(X_test,Y_test)



In [1]:

    
from sklearn.ensemble import GradientBoostingClassifier as GBC



In [ ]: