In [2]:
%matplotlib inline
from matplotlib import pyplot as plt
import seaborn
import xgboost
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve

In [ ]:
import time
from tqdm import tqdm_notebook
for i in tqdm_notebook(range(1000)):
    time.sleep(0.1)



In [33]:
X, Y = datasets.make_classification(10000, 30)
pd.DataFrame(X).head(5)


Out[33]:
0 1 2 3 4 5 6 7 8 9 ... 20 21 22 23 24 25 26 27 28 29
0 0.301581 1.408314 1.409227 -0.173609 -1.703800 1.663592 -0.328853 0.546714 -0.853095 -0.475903 ... 0.795107 0.877714 1.200536 -0.533912 1.020690 0.940796 -0.700787 1.470776 2.112411 -0.090783
1 -0.487323 -0.484605 -0.419753 1.258002 0.423214 -0.724253 -1.232197 2.445035 -1.378338 -1.232679 ... -0.772201 -1.028194 -0.035316 0.278069 -2.562959 -0.282449 -2.042751 -0.023760 -0.782766 -1.322814
2 -0.554187 -1.097552 -0.697864 -1.132140 0.325907 0.193525 -0.133474 -0.413987 -0.287095 -1.145795 ... -1.022807 -0.136579 -0.009012 -0.757375 0.826511 0.551015 0.439954 -0.551893 -0.062991 -0.235474
3 0.383414 0.542371 0.208094 -0.077591 0.181180 1.113206 -0.515131 -0.327328 -0.037342 0.818765 ... 0.029210 0.033616 0.744697 -0.222526 0.535837 -0.433155 -0.279932 -1.056352 -0.083518 -1.862261
4 0.929047 1.468983 1.346298 -0.342393 -1.467817 -1.218745 0.524839 1.758517 -1.354053 -1.627166 ... 0.693371 0.562292 1.653235 1.092434 0.187117 0.395255 -0.270094 -0.246970 -0.649252 0.851364

5 rows × 30 columns


In [34]:
X_train, X_test, y_train, y_test = train_test_split(X, Y)
clsf = xgboost.XGBClassifier(n_estimators=1000)
clsf.fit(X_train, y_train)
prediction = clsf.predict_proba(X_test)

In [36]:
fpr, tpr, _ = roc_curve(y_test, prediction[:,1])
plt.plot(fpr, tpr)


Out[36]:
[<matplotlib.lines.Line2D at 0x7ff5ef1f8b50>]