notebook.community

Edit and run



In [150]:

    
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
%pylab inline









    



Populating the interactive namespace from numpy and matplotlib



In [190]:

    
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, roc_auc_score

from sklearn.cross_validation import train_test_split, KFold, cross_val_score



In [ ]:



In [110]:

    
data = pd.read_csv("../datasets/dota win probability/features.csv")



In [ ]:



In [161]:

    
X = data.drop(["radiant_win", "barracks_status_radiant", "barracks_status_dire", "tower_status_dire", "tower_status_radiant"], 1)
y = data.radiant_win



In [162]:

    
X.sample(5)









    Out[162]:







  
    
      
      match_id
      start_time
      lobby_type
      r1_hero
      r1_level
      r1_xp
      r1_gold
      r1_lh
      r1_kills
      r1_deaths
      ...
      radiant_first_ward_time
      dire_bottle_time
      dire_courier_time
      dire_flying_courier_time
      dire_tpscroll_count
      dire_boots_count
      dire_ward_observer_count
      dire_ward_sentry_count
      dire_first_ward_time
      duration
    
  
  
    
      6267
      7393
      1433298099
      1
      11
      4
      1403
      914
      10
      0
      3
      ...
      -18.0
      94.0
      -82.0
      199.0
      2
      5
      4
      0
      -24.0
      930
    
    
      9419
      11124
      1434914821
      0
      44
      5
      1756
      1393
      22
      0
      0
      ...
      8.0
      NaN
      -86.0
      244.0
      2
      3
      3
      1
      -16.0
      2959
    
    
      90866
      106911
      1449779261
      1
      46
      5
      2048
      1666
      28
      0
      0
      ...
      9.0
      249.0
      -88.0
      183.0
      0
      1
      2
      0
      37.0
      1363
    
    
      93239
      109693
      1449891652
      1
      9
      4
      1423
      1511
      22
      0
      1
      ...
      -5.0
      158.0
      -69.0
      NaN
      0
      2
      2
      1
      7.0
      2459
    
    
      18461
      21738
      1438478732
      0
      7
      3
      768
      677
      0
      0
      0
      ...
      -29.0
      90.0
      -76.0
      182.0
      1
      5
      2
      0
      -38.0
      1819
    
  

5 rows × 104 columns



In [163]:

    
data.describe()









    Out[163]:







  
    
      
      match_id
      start_time
      lobby_type
      r1_hero
      r1_level
      r1_xp
      r1_gold
      r1_lh
      r1_kills
      r1_deaths
      ...
      dire_boots_count
      dire_ward_observer_count
      dire_ward_sentry_count
      dire_first_ward_time
      duration
      radiant_win
      tower_status_radiant
      tower_status_dire
      barracks_status_radiant
      barracks_status_dire
    
  
  
    
      count
      97230.000000
      9.723000e+04
      97230.000000
      97230.000000
      97230.000000
      97230.000000
      97230.000000
      97230.000000
      97230.000000
      97230.000000
      ...
      97230.000000
      97230.000000
      97230.000000
      95404.000000
      97230.000000
      97230.000000
      97230.000000
      97230.000000
      97230.000000
      97230.000000
    
    
      mean
      57185.416744
      1.444232e+09
      2.630999
      51.517104
      3.442672
      1233.405801
      1147.899702
      11.231996
      0.357009
      0.362285
      ...
      3.349553
      2.448339
      0.689119
      -6.901922
      2332.247886
      0.518503
      1309.227790
      1286.310820
      40.599095
      41.337036
    
    
      std
      33007.123878
      5.515393e+06
      2.835761
      32.564211
      1.111741
      566.588895
      464.111662
      9.041620
      0.663889
      0.626704
      ...
      1.155609
      0.813459
      0.710122
      40.701397
      715.806850
      0.499660
      853.921365
      851.009148
      27.871645
      27.064873
    
    
      min
      0.000000
      1.430199e+09
      0.000000
      1.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
      ...
      0.000000
      0.000000
      0.000000
      -84.000000
      900.000000
      0.000000
      0.000000
      0.000000
      0.000000
      0.000000
    
    
      25%
      28545.250000
      1.440815e+09
      1.000000
      22.000000
      3.000000
      767.000000
      746.000000
      2.000000
      0.000000
      0.000000
      ...
      3.000000
      2.000000
      0.000000
      -31.000000
      1818.000000
      0.000000
      36.000000
      256.000000
      3.000000
      3.000000
    
    
      50%
      57160.500000
      1.446338e+09
      1.000000
      50.000000
      3.000000
      1175.000000
      1113.000000
      11.000000
      0.000000
      0.000000
      ...
      3.000000
      2.000000
      1.000000
      -16.000000
      2268.000000
      1.000000
      1824.000000
      1798.000000
      63.000000
      60.000000
    
    
      75%
      85755.750000
      1.448829e+09
      7.000000
      75.000000
      4.000000
      1704.000000
      1479.000000
      19.000000
      1.000000
      1.000000
      ...
      4.000000
      3.000000
      1.000000
      8.000000
      2778.000000
      1.000000
      1974.000000
      1974.000000
      63.000000
      63.000000
    
    
      max
      114406.000000
      1.450313e+09
      7.000000
      112.000000
      6.000000
      3319.000000
      4332.000000
      47.000000
      8.000000
      5.000000
      ...
      9.000000
      9.000000
      13.000000
      300.000000
      8452.000000
      1.000000
      2047.000000
      2047.000000
      63.000000
      63.000000
    
  

8 rows × 109 columns



In [164]:

    
np.sum(X.isna())









    Out[164]:





match_id                           0
start_time                         0
lobby_type                         0
r1_hero                            0
r1_level                           0
r1_xp                              0
r1_gold                            0
r1_lh                              0
r1_kills                           0
r1_deaths                          0
r1_items                           0
r2_hero                            0
r2_level                           0
r2_xp                              0
r2_gold                            0
r2_lh                              0
r2_kills                           0
r2_deaths                          0
r2_items                           0
r3_hero                            0
r3_level                           0
r3_xp                              0
r3_gold                            0
r3_lh                              0
r3_kills                           0
r3_deaths                          0
r3_items                           0
r4_hero                            0
r4_level                           0
r4_xp                              0
                               ...  
d4_items                           0
d5_hero                            0
d5_level                           0
d5_xp                              0
d5_gold                            0
d5_lh                              0
d5_kills                           0
d5_deaths                          0
d5_items                           0
first_blood_time               19553
first_blood_team               19553
first_blood_player1            19553
first_blood_player2            43987
radiant_bottle_time            15691
radiant_courier_time             692
radiant_flying_courier_time    27479
radiant_tpscroll_count             0
radiant_boots_count                0
radiant_ward_observer_count        0
radiant_ward_sentry_count          0
radiant_first_ward_time         1836
dire_bottle_time               16143
dire_courier_time                676
dire_flying_courier_time       26098
dire_tpscroll_count                0
dire_boots_count                   0
dire_ward_observer_count           0
dire_ward_sentry_count             0
dire_first_ward_time            1826
duration                           0
Length: 104, dtype: int64



In [165]:

    
X = X.fillna(0)



In [166]:

    
X, X_test, y, y_test = train_test_split(X, y, stratify=y)



In [197]:

    
kfold = KFold(X.shape[0], n_folds=5)



In [198]:









    Out[198]:





sklearn.cross_validation.KFold(n=72922, n_folds=5, shuffle=False, random_state=None)



In [ ]:



In [169]:

    
train = []
test = []

for i in tqdm(range(1, 16)):
    lr = DecisionTreeClassifier(min_samples_leaf=5, max_depth=i)
    lr.fit(X, y)
    train.append(accuracy_score(lr.predict(X), y) * 100)
    test.append(accuracy_score(lr.predict(X_test), y_test) * 100)

plot(train, c="red")
plot(test, c="green")









    





 
 










    









    Out[169]:





[<matplotlib.lines.Line2D at 0x2201ee2b898>]



In [ ]:



In [184]:

    
dt = DecisionTreeClassifier(min_samples_leaf=5, max_depth=7)



In [185]:

    
dt.fit(X, y)









    Out[185]:





DecisionTreeClassifier(class_weight=None, criterion='gini', max_depth=7,
            max_features=None, max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=5, min_samples_split=2,
            min_weight_fraction_leaf=0.0, presort=False, random_state=None,
            splitter='best')



In [186]:

    
accuracy_score(dt.predict(X), y) * 100 , accuracy_score(dt.predict(X_test), y_test) * 100









    Out[186]:





(61.845533583829294, 59.091657067632056)



In [ ]:

    
accuracy_score()



In [187]:

    
rf = RandomForestClassifier(n_jobs=8)



In [188]:

    
rf.fit(X, y)









    Out[188]:





RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=8,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False)



In [189]:

    
accuracy_score(rf.predict(X), y) * 100, accuracy_score(rf.predict(X_test), y_test) * 100









    Out[189]:





(98.77677518444365, 60.24765509297351)



In [ ]:



In [202]:

    
gb = GradientBoostingClassifier()



In [203]:

    
gb.fit(X, y)









    Out[203]:





GradientBoostingClassifier(criterion='friedman_mse', init=None,
              learning_rate=0.1, loss='deviance', max_depth=3,
              max_features=None, max_leaf_nodes=None,
              min_impurity_decrease=0.0, min_impurity_split=None,
              min_samples_leaf=1, min_samples_split=2,
              min_weight_fraction_leaf=0.0, n_estimators=100,
              presort='auto', random_state=None, subsample=1.0, verbose=0,
              warm_start=False)



In [205]:

    
accuracy_score(gb.predict(X), y) * 100, accuracy_score(gb.predict(X_test), y_test) * 100









    Out[205]:





(66.32703436548641, 64.41089353299326)



In [ ]:



In [201]:

    
np.mean(cross_val_score(rf, X, y, cv=5, n_jobs=8))









    Out[201]:





0.6023148815905249



In [ ]:

	match_id	start_time	lobby_type	r1_hero	r1_level	r1_xp	r1_gold	r1_lh	r1_deaths	...	radiant_first_ward_time	dire_bottle_time	dire_courier_time	dire_flying_courier_time	dire_tpscroll_count	dire_boots_count	dire_ward_observer_count	dire_ward_sentry_count	dire_first_ward_time	duration
6267	7393	1433298099	1	11	4	1403	914	10	3	...	-18.0	94.0	-82.0	199.0	2	5	4	0	-24.0	930
9419	11124	1434914821	0	44	5	1756	1393	22	0	...	8.0	NaN	-86.0	244.0	2	3	3	1	-16.0	2959
90866	106911	1449779261	1	46	5	2048	1666	28	0	...	9.0	249.0	-88.0	183.0	0	1	2	0	37.0	1363
93239	109693	1449891652	1	9	4	1423	1511	22	1	...	-5.0	158.0	-69.0	NaN	0	2	2	1	7.0	2459
18461	21738	1438478732	0	7	3	768	677	0	0	...	-29.0	90.0	-76.0	182.0	1	5	2	0	-38.0	1819

	match_id	start_time	lobby_type	r1_hero	r1_level	r1_xp	r1_gold	r1_lh	r1_kills	r1_deaths	...	dire_boots_count	dire_ward_observer_count	dire_ward_sentry_count	dire_first_ward_time	duration	radiant_win	tower_status_radiant	tower_status_dire	barracks_status_radiant	barracks_status_dire
count	97230.000000	9.723000e+04	97230.000000	97230.000000	97230.000000	97230.000000	97230.000000	97230.000000	97230.000000	97230.000000	...	97230.000000	97230.000000	97230.000000	95404.000000	97230.000000	97230.000000	97230.000000	97230.000000	97230.000000	97230.000000
mean	57185.416744	1.444232e+09	2.630999	51.517104	3.442672	1233.405801	1147.899702	11.231996	0.357009	0.362285	...	3.349553	2.448339	0.689119	-6.901922	2332.247886	0.518503	1309.227790	1286.310820	40.599095	41.337036
std	33007.123878	5.515393e+06	2.835761	32.564211	1.111741	566.588895	464.111662	9.041620	0.663889	0.626704	...	1.155609	0.813459	0.710122	40.701397	715.806850	0.499660	853.921365	851.009148	27.871645	27.064873
min	0.000000	1.430199e+09	0.000000	1.000000	0.000000	0.000000	0.000000	0.000000	0.000000	0.000000	...	0.000000	0.000000	0.000000	-84.000000	900.000000	0.000000	0.000000	0.000000	0.000000	0.000000
25%	28545.250000	1.440815e+09	1.000000	22.000000	3.000000	767.000000	746.000000	2.000000	0.000000	0.000000	...	3.000000	2.000000	0.000000	-31.000000	1818.000000	0.000000	36.000000	256.000000	3.000000	3.000000
50%	57160.500000	1.446338e+09	1.000000	50.000000	3.000000	1175.000000	1113.000000	11.000000	0.000000	0.000000	...	3.000000	2.000000	1.000000	-16.000000	2268.000000	1.000000	1824.000000	1798.000000	63.000000	60.000000
75%	85755.750000	1.448829e+09	7.000000	75.000000	4.000000	1704.000000	1479.000000	19.000000	1.000000	1.000000	...	4.000000	3.000000	1.000000	8.000000	2778.000000	1.000000	1974.000000	1974.000000	63.000000	63.000000
max	114406.000000	1.450313e+09	7.000000	112.000000	6.000000	3319.000000	4332.000000	47.000000	8.000000	5.000000	...	9.000000	9.000000	13.000000	300.000000	8452.000000	1.000000	2047.000000	2047.000000	63.000000	63.000000