In [6]:
!wget https://archive.ics.uci.edu/ml/machine-learning-databases/pima-indians-diabetes/pima-indians-diabetes.data
In [9]:
ls
In [35]:
from numpy import loadtxt
from xgboost import XGBClassifier, plot_importance
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
%matplotlib inline
In [10]:
# load data
dataset = loadtxt('pima-indians-diabetes.data', delimiter=',')
In [11]:
X = dataset[:, 0:8]
Y = dataset[:, 8]
In [14]:
X.shape, Y.shape
Out[14]:
In [15]:
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=test_size, random_state=seed)
In [16]:
model = XGBClassifier()
In [17]:
model
Out[17]:
In [18]:
model.fit(X_train, y_train)
Out[18]:
In [20]:
y_pred = model.predict(X_test)
In [24]:
predictions = [round(value) for value in y_pred]
In [29]:
accuracy = accuracy_score(y_test, predictions)
In [30]:
print('Accuracy: %.2f%%' % (accuracy * 100.0))
In [31]:
print(model.feature_importances_)
In [37]:
plot_importance(model)
Out[37]:
In [38]:
model.learning_rate
Out[38]:
In [40]:
dir(model)
Out[40]:
In [41]:
model.n_estimators
Out[41]:
In [43]:
model.n_classes_
Out[43]:
In [44]:
model.subsample
Out[44]:
In [45]:
model.max_depth
Out[45]:
In [49]:
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import StratifiedKFold
In [47]:
n_estimators = [50, 100, 150, 200]
max_depth = [2, 4, 6, 8]
param_grid = dict(max_depth=max_depth, n_estimators=n_estimators)
In [48]:
param_grid
Out[48]:
In [51]:
X = dataset[:, 0:8]
Y = dataset[:, 8]
In [52]:
model = XGBClassifier()
kfold = StratifiedKFold(n_splits=10, shuffle=True, random_state=7)
grid_search = GridSearchCV(model, param_grid, scoring='neg_log_loss', n_jobs=1, cv=kfold, verbose=1)
result = grid_search.fit(X, Y)
In [54]:
result.best_score_
Out[54]:
In [55]:
result.best_params_
Out[55]:
In [56]:
means = result.cv_results_['mean_test_score']
In [57]:
means
Out[57]:
In [58]:
std = result.cv_results_['std_test_score']
In [59]:
std
Out[59]:
In [60]:
params = result.cv_results_['params']
In [61]:
params
Out[61]:
In [63]:
for mean, stdev, param in zip(means, std, params):
print('%f (%f) with: %r' % (mean, stdev, param))
In [ ]: