In [150]:
import numpy as np
import pandas as pd
from tqdm import tqdm_notebook as tqdm
%pylab inline
In [190]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score, roc_auc_score
from sklearn.cross_validation import train_test_split, KFold, cross_val_score
In [ ]:
In [110]:
data = pd.read_csv("../datasets/dota win probability/features.csv")
In [ ]:
In [161]:
X = data.drop(["radiant_win", "barracks_status_radiant", "barracks_status_dire", "tower_status_dire", "tower_status_radiant"], 1)
y = data.radiant_win
In [162]:
X.sample(5)
Out[162]:
In [163]:
data.describe()
Out[163]:
In [164]:
np.sum(X.isna())
Out[164]:
In [165]:
X = X.fillna(0)
In [166]:
X, X_test, y, y_test = train_test_split(X, y, stratify=y)
In [197]:
kfold = KFold(X.shape[0], n_folds=5)
In [198]:
Out[198]:
In [ ]:
In [169]:
train = []
test = []
for i in tqdm(range(1, 16)):
lr = DecisionTreeClassifier(min_samples_leaf=5, max_depth=i)
lr.fit(X, y)
train.append(accuracy_score(lr.predict(X), y) * 100)
test.append(accuracy_score(lr.predict(X_test), y_test) * 100)
plot(train, c="red")
plot(test, c="green")
Out[169]:
In [ ]:
In [184]:
dt = DecisionTreeClassifier(min_samples_leaf=5, max_depth=7)
In [185]:
dt.fit(X, y)
Out[185]:
In [186]:
accuracy_score(dt.predict(X), y) * 100 , accuracy_score(dt.predict(X_test), y_test) * 100
Out[186]:
In [ ]:
accuracy_score()
In [187]:
rf = RandomForestClassifier(n_jobs=8)
In [188]:
rf.fit(X, y)
Out[188]:
In [189]:
accuracy_score(rf.predict(X), y) * 100, accuracy_score(rf.predict(X_test), y_test) * 100
Out[189]:
In [ ]:
In [202]:
gb = GradientBoostingClassifier()
In [203]:
gb.fit(X, y)
Out[203]:
In [205]:
accuracy_score(gb.predict(X), y) * 100, accuracy_score(gb.predict(X_test), y_test) * 100
Out[205]:
In [ ]:
In [201]:
np.mean(cross_val_score(rf, X, y, cv=5, n_jobs=8))
Out[201]:
In [ ]: