In [15]:
import pandas as pd
import numpy as np
% matplotlib inline
from matplotlib import pyplot as plt
In [16]:
path_to_data = r'E:\Universidade\Projects\DSSA\learning-units\units\16-tuning-hyper-parameters\data\affairs.csv'
data = pd.read_csv(path_to_data)
data.head(10)
Out[16]:
In [17]:
from sklearn.model_selection import train_test_split, RandomizedSearchCV, GridSearchCV
from sklearn.metrics import classification_report
from sklearn import tree
from scipy.stats import randint
In [18]:
data = pd.read_csv(path_to_data)
data['child'] = data['child'].map({'no': 0, 'yes': 1}).astype(int)
data['sex'] = data['sex'].map({'male': 0, 'female': 1}).astype(int)
y = data['nbaffairs'].values
X = data.drop(['nbaffairs'], axis=1).values
data.head(10)
Out[18]:
In [19]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
In [20]:
# Define the parameter space
parameter_space = [{'max_depth': [1,2,3,4,5], 'max_features': range(1, 8)}]
# Choose the classifier
classifier = tree.DecisionTreeClassifier()
# Select grid search with cross validation
grid_search = GridSearchCV(classifier, parameter_space, cv=5)
grid_search.fit(X_train, y_train)
Out[20]:
In [21]:
grid_search.best_params_
Out[21]:
In [22]:
grid_search.cv_results_['mean_test_score']
Out[22]:
In [23]:
grid_search.best_score_
Out[23]:
In [24]:
# Define the parameter space
parameter_space_dist = {"max_depth": [1, 2, 3, 4, 5], "max_features": randint(1, 8)}
# Choose the classifier
classifier = tree.DecisionTreeClassifier()
# Select grid search with cross validation
random_search = RandomizedSearchCV(classifier, parameter_space_dist, cv=5, n_iter=500)
random_search.fit(X_train, y_train)
Out[24]:
In [25]:
random_search.best_params_
Out[25]:
In [26]:
random_search.best_score_
Out[26]:
In [41]:
classifier = tree.DecisionTreeClassifier(max_depth=4)
classifier.fit(X_train, y_train)
Out[41]:
In [42]:
classifier.score(X_test, y_test)
Out[42]:
In [46]:
tree.export_graphviz(classifier, feature_names=data.columns.values.tolist(), filled=True, out_file=r'E:\Universidade\Projects\DSSA\learning-units\units\16-tuning-hyper-parameters\tree.dot')
In [48]:
classifier.get_params()
Out[48]:
In [ ]: