In [6]:
# multiclass classification
import pandas
#import numpy
import xgboost
from sklearn import model_selection,cross_validation
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import LabelEncoder
%matplotlib inline
import matplotlib.pyplot as plt
# load data
data = pandas.read_csv('iris.csv', header=1)
dataset = data.values
# split data into X and y
X = dataset[:,0:4]
Y = dataset[:,4]
# encode string class values as integers
label_encoder = LabelEncoder()
label_encoder = label_encoder.fit(Y)
label_encoded_y = label_encoder.transform(Y)
seed = 7
test_size = 0.33
X_train, X_test, y_train, y_test = cross_validation.train_test_split(X, label_encoded_y, test_size=test_size, random_state=seed)
# fit model no training data
model = xgboost.XGBClassifier()
model.fit(X_train, y_train)
print(model)
# make predictions for test data
y_pred = model.predict(X_test)
predictions = [round(value) for value in y_pred]
# evaluate predictions
accuracy = accuracy_score(y_test, predictions)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
In [12]:
# plot the important features #
fig, ax = plt.subplots() #figsize=(12,18)
#xgboost.plot_importance(model, max_num_features=50, height=0.8, ax=ax)
xgboost.plot_importance(model, height=0.8, ax=ax)
plt.show()