notebook.community

Edit and run



In [1]:

    
from _search import GranularGridSearchCVSave
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.grid_search import GridSearchCV
from xgboost.sklearn import XGBClassifier
from sklearn.ensemble import RandomForestClassifier









    



C:\Users\Evgeny\Anaconda2\lib\site-packages\sklearn\cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.
  "This module will be removed in 0.20.", DeprecationWarning)
C:\Users\Evgeny\Anaconda2\lib\site-packages\sklearn\grid_search.py:43: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. This module will be removed in 0.20.
  DeprecationWarning)



In [2]:

    
train = pd.read_csv('x_train.csv',delimiter=';')
y = np.ravel(pd.read_csv('y_train.csv',header=None))
test = pd.read_csv('x_test.csv',delimiter=';')



In [ ]:

    
param_test = {
 'n_estimators':[300,400],
 'criterion':['gini','entropy'],
 'n_jobs':-1
}
gs = GranularGridSearchCVSave(train, y)
gs.fit_and_save(RandomForestClassifier(),param_test,'neg_log_loss',verbose=True)









    



mean:-0.49216	std:0.04124	cv:  5	params:{'n_estimators': 300, 'n_jobs': -1, 'criterion': 'gini'}
mean:-0.49551	std:0.03220	cv:  5	params:{'n_estimators': 300, 'n_jobs': -1, 'criterion': 'entropy'}
mean:-0.48247	std:0.03528	cv:  5	params:{'n_estimators': 400, 'n_jobs': -1, 'criterion': 'gini'}



In [4]:

    
results = gs.get_score_from_file(RandomForestClassifier)



In [5]:

    
for result in results:
    print result









    



(-0.48461530837099998, 0.033721108571200004, RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=400, n_jobs=-1, oob_score=False,
            random_state=None, verbose=0, warm_start=False))
(-0.48553580880800001, 0.039948909167800001, RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=400, n_jobs=-1, oob_score=False,
            random_state=None, verbose=0, warm_start=False))
(-0.48743403624600001, 0.0315789316312, RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=300, n_jobs=-1, oob_score=False,
            random_state=None, verbose=0, warm_start=False))
(-0.50186185872800004, 0.029969451211200003, RandomForestClassifier(bootstrap=True, class_weight=None, criterion='entropy',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=300, n_jobs=-1, oob_score=False,
            random_state=None, verbose=0, warm_start=False))



In [ ]: