In [10]:
from pandas import read_csv
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
import matplotlib.pyplot as plt
import numpy as np
In [19]:
data = read_csv('../data/otto/train.csv')
In [20]:
data.shape
Out[20]:
In [21]:
data.head()
Out[21]:
In [11]:
np.unique(data.target)
Out[11]:
In [22]:
dataset = data.values
In [25]:
X = dataset[:, 1:94]
y = dataset[:, 94]
In [27]:
label_encoded_y = LabelEncoder().fit_transform(y)
In [28]:
label_encoded_y
Out[28]:
In [29]:
model = XGBClassifier()
In [33]:
n_estimators = range(50, 400, 50)
print(list(n_estimators))
param_grid = dict(n_estimators=n_estimators)
In [34]:
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
In [ ]:
grid_search = GridSearchCV(model, param_grid, scoring='neg_log_loss', cv=kfold, verbose=2)
grid_result = grid_search.fit(X, label_encoded_y)
In [ ]: