notebook.community

Edit and run



In [2]:

    
%matplotlib inline
from matplotlib import pyplot as plt
import seaborn
import xgboost
import pandas as pd
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve



In [ ]:

    
import time
from tqdm import tqdm_notebook
for i in tqdm_notebook(range(1000)):
    time.sleep(0.1)



In [33]:

    
X, Y = datasets.make_classification(10000, 30)
pd.DataFrame(X).head(5)









    Out[33]:






  
    
      
      0
      1
      2
      3
      4
      5
      6
      7
      8
      9
      ...
      20
      21
      22
      23
      24
      25
      26
      27
      28
      29
    
  
  
    
      0
      0.301581
      1.408314
      1.409227
      -0.173609
      -1.703800
      1.663592
      -0.328853
      0.546714
      -0.853095
      -0.475903
      ...
      0.795107
      0.877714
      1.200536
      -0.533912
      1.020690
      0.940796
      -0.700787
      1.470776
      2.112411
      -0.090783
    
    
      1
      -0.487323
      -0.484605
      -0.419753
      1.258002
      0.423214
      -0.724253
      -1.232197
      2.445035
      -1.378338
      -1.232679
      ...
      -0.772201
      -1.028194
      -0.035316
      0.278069
      -2.562959
      -0.282449
      -2.042751
      -0.023760
      -0.782766
      -1.322814
    
    
      2
      -0.554187
      -1.097552
      -0.697864
      -1.132140
      0.325907
      0.193525
      -0.133474
      -0.413987
      -0.287095
      -1.145795
      ...
      -1.022807
      -0.136579
      -0.009012
      -0.757375
      0.826511
      0.551015
      0.439954
      -0.551893
      -0.062991
      -0.235474
    
    
      3
      0.383414
      0.542371
      0.208094
      -0.077591
      0.181180
      1.113206
      -0.515131
      -0.327328
      -0.037342
      0.818765
      ...
      0.029210
      0.033616
      0.744697
      -0.222526
      0.535837
      -0.433155
      -0.279932
      -1.056352
      -0.083518
      -1.862261
    
    
      4
      0.929047
      1.468983
      1.346298
      -0.342393
      -1.467817
      -1.218745
      0.524839
      1.758517
      -1.354053
      -1.627166
      ...
      0.693371
      0.562292
      1.653235
      1.092434
      0.187117
      0.395255
      -0.270094
      -0.246970
      -0.649252
      0.851364
    
  

5 rows × 30 columns



In [34]:

    
X_train, X_test, y_train, y_test = train_test_split(X, Y)
clsf = xgboost.XGBClassifier(n_estimators=1000)
clsf.fit(X_train, y_train)
prediction = clsf.predict_proba(X_test)



In [36]:

    
fpr, tpr, _ = roc_curve(y_test, prediction[:,1])
plt.plot(fpr, tpr)









    Out[36]:





[<matplotlib.lines.Line2D at 0x7ff5ef1f8b50>]

	0	1	2	3	4	5	6	7	8	9	...	20	21	22	23	24	25	26	27	28	29
0	0.301581	1.408314	1.409227	-0.173609	-1.703800	1.663592	-0.328853	0.546714	-0.853095	-0.475903	...	0.795107	0.877714	1.200536	-0.533912	1.020690	0.940796	-0.700787	1.470776	2.112411	-0.090783
1	-0.487323	-0.484605	-0.419753	1.258002	0.423214	-0.724253	-1.232197	2.445035	-1.378338	-1.232679	...	-0.772201	-1.028194	-0.035316	0.278069	-2.562959	-0.282449	-2.042751	-0.023760	-0.782766	-1.322814
2	-0.554187	-1.097552	-0.697864	-1.132140	0.325907	0.193525	-0.133474	-0.413987	-0.287095	-1.145795	...	-1.022807	-0.136579	-0.009012	-0.757375	0.826511	0.551015	0.439954	-0.551893	-0.062991	-0.235474
3	0.383414	0.542371	0.208094	-0.077591	0.181180	1.113206	-0.515131	-0.327328	-0.037342	0.818765	...	0.029210	0.033616	0.744697	-0.222526	0.535837	-0.433155	-0.279932	-1.056352	-0.083518	-1.862261
4	0.929047	1.468983	1.346298	-0.342393	-1.467817	-1.218745	0.524839	1.758517	-1.354053	-1.627166	...	0.693371	0.562292	1.653235	1.092434	0.187117	0.395255	-0.270094	-0.246970	-0.649252	0.851364