Tutorial how to use xgboost


In [13]:
import xgboost as xgb
from sklearn.datasets import load_boston
from sklearn.cross_validation import train_test_split
from sklearn.metrics import r2_score, auc

In [11]:
boston = load_boston()
#print(boston.DESCR)

In [3]:
print(boston.data.shape)


(506, 13)

In [4]:
X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target)

model = xgb.XGBRegressor()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print( r2_score(y_test, y_pred) )


0.912696331838

Let's do the same for classification problem

Tips: use iris dataset for this and f1_score for measure quality and xgb.XGBClassifier()


In [25]:
#you should import load_iris
#you should import f1_score

iris = load_iris()

Visualisation


In [23]:
import seaborn as sns
import matplotlib.pyplot as plt
import pandas as pd
sns.set(style="whitegrid", palette="husl")
%matplotlib inline


iris_melt = pd.melt(iris, "species", var_name="measurement")
f, ax = plt.subplots(1, figsize=(15,9))
sns.stripplot(x="measurement", y="value", hue="species", data=iris_melt, jitter=True, edgecolor="white", ax=ax)


Out[23]:
<matplotlib.axes._subplots.AxesSubplot at 0x1124c1450>

In [26]:
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target)

model = #create a model here
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print( f1_score(y_test, y_pred, average='micro') )

In [ ]: