In [75]:
# Importing the libraries
import pandas as pd
import numpy as np

from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC

from collections import Counter
from collections import defaultdict
import operator

In [7]:
data = pd.read_csv('Data/trainingdata.txt', header=None)

data.columns = ['team1_hero1',"team1_hero2", "team1_hero3", "team1_hero4", "team1_hero5", "team2_hero1", "team2_hero2","team2_hero3", "team2_hero4", "team2_hero5", "Winning_team"]

labels = data.Winning_team

In [8]:
data.head(10)


/Users/mickaellegal/Documents/VirtualEnv/ENV/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.

  warnings.warn(d.msg, DeprecationWarning)
/Users/mickaellegal/Documents/VirtualEnv/ENV/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.

  warnings.warn(d.msg, DeprecationWarning)
/Users/mickaellegal/Documents/VirtualEnv/ENV/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.

  warnings.warn(d.msg, DeprecationWarning)
Out[8]:
team1_hero1 team1_hero2 team1_hero3 team1_hero4 team1_hero5 team2_hero1 team2_hero2 team2_hero3 team2_hero4 team2_hero5 Winning_team
0 Sven Lone Druid Venomancer Clockwerk Shadow Shaman Invoker Gyrocopter Anti-Mage Alchemist Slark 2
1 Riki Tinker Puck Leshrac Nyx Assassin Slardar Sand King Spectre Necrolyte Warlock 1
2 Invoker Mirana Pudge Magnus Keeper of the Light Rubick Tidehunter Queen of Pain Faceless Void Sniper 2
3 Riki Centaur Warrunner Treant Protector Queen of Pain Broodmother Rubick Weaver Troll Warlord Alchemist Drow Ranger 1
4 Razor Kunkka Drow Ranger Leshrac Zeus Riki Bane Visage Invoker Timbersaw 1
5 Sand King Shadow Shaman Silencer Gyrocopter Meepo Nyx Assassin Slark Mirana Rubick Queen of Pain 2
6 Morphling Omniknight Pudge Crystal Maiden Razor Troll Warlord Tidehunter Lina Dark Seer Gyrocopter 1
7 Brewmaster Leshrac Ogre Magi Kunkka Riki Disruptor Juggernaut Tiny Enigma Bounty Hunter 1
8 Slardar Earthshaker Invoker Slark Doom Necrolyte Timbersaw Bounty Hunter Huskar Razor 1
9 Dazzle Naga Siren Ogre Magi Pudge Nature's Prophet Chen Puck Timbersaw Magnus Ursa 2

In [9]:
# Columns name list
# col_ind =['team1_hero1',"team1_hero2", "team1_hero3", "team1_hero4", "team1_hero5", "team2_hero1", "team2_hero2","team2_hero3", "team2_hero4", "team2_hero5"]

# Getting the name of all the heroes
heroes = set()

for j in data['team1_hero1']:
    heroes.add(j)
    
heroes_dict = defaultdict()

for k,v in enumerate(heroes):
    heroes_dict[v] = k

In [55]:
test = np.genfromtxt('Data/trainingdata.txt', delimiter=',')

In [56]:
test


Out[56]:
array([[ nan,  nan,  nan, ...,  nan,  nan,   2.],
       [ nan,  nan,  nan, ...,  nan,  nan,   1.],
       [ nan,  nan,  nan, ...,  nan,  nan,   2.],
       ..., 
       [ nan,  nan,  nan, ...,  nan,  nan,   2.],
       [ nan,  nan,  nan, ...,  nan,  nan,   1.],
       [ nan,  nan,  nan, ...,  nan,  nan,   2.]])

In [47]:
# Columns name list
col_ind =['team1_hero1',"team1_hero2", "team1_hero3", "team1_hero4", "team1_hero5", "team2_hero1", "team2_hero2","team2_hero3", "team2_hero4", "team2_hero5"]

data2 = pd.DataFrame(data)
data3 = data2

In [50]:
data4 = data3.replace(heroes_dict.keys(), heroes_dict.values())
data4.head()


/Users/mickaellegal/Documents/VirtualEnv/ENV/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.

  warnings.warn(d.msg, DeprecationWarning)
/Users/mickaellegal/Documents/VirtualEnv/ENV/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.

  warnings.warn(d.msg, DeprecationWarning)
/Users/mickaellegal/Documents/VirtualEnv/ENV/lib/python2.7/site-packages/pandas/core/config.py:570: DeprecationWarning: height has been deprecated.

  warnings.warn(d.msg, DeprecationWarning)
Out[50]:
team1_hero1 team1_hero2 team1_hero3 team1_hero4 team1_hero5 team2_hero1 team2_hero2 team2_hero3 team2_hero4 team2_hero5 Winning_team
0 58 17 8 65 73 28 43 21 38 88 2
1 27 9 69 31 70 24 62 85 51 55 1
2 28 15 52 50 22 96 93 76 14 29 2
3 27 60 61 76 34 96 89 90 38 83 1
4 0 87 83 31 32 27 7 92 28 75 1

In [197]:
x_train, x_test, y_train, y_test = train_test_split(my_data4, labels, test_size=0.25)

In [203]:
from sklearn.metrics import classification_report
target_names = ['Team 1', 'Team 2']

clfs =[("SVM", SVC(gamma=0.001)),
       ("RandomForest",RandomForestClassifier()),
       ("Nearest Neighbor",KNeighborsClassifier())]

for algo, clf in clfs:
    clf.fit(x_train, y_train)
    preds = clf.predict(x_test)
    accuracy = np.sum(np.where(preds==y_test,1.0,0.0))/ len(y_test)
    y_pred = np.array(preds)
    print(classification_report(y_test, y_pred, target_names=target_names))
    print clf, accuracy


             precision    recall  f1-score   support

     Team 1       0.53      0.60      0.56      1958
     Team 2       0.48      0.41      0.44      1792

avg / total       0.51      0.51      0.50      3750

SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0, degree=3,
  gamma=0.001, kernel=rbf, max_iter=-1, probability=False,
  random_state=None, shrinking=True, tol=0.001, verbose=False) 0.5088
             precision    recall  f1-score   support

     Team 1       1.00      1.00      1.00      1958
     Team 2       1.00      1.00      1.00      1792

avg / total       1.00      1.00      1.00      3750

RandomForestClassifier(bootstrap=True, compute_importances=None,
            criterion=gini, max_depth=None, max_features=auto,
            min_density=None, min_samples_leaf=1, min_samples_split=2,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0) 1.0
             precision    recall  f1-score   support

     Team 1       0.53      0.55      0.54      1958
     Team 2       0.49      0.47      0.48      1792

avg / total       0.51      0.51      0.51      3750

KNeighborsClassifier(algorithm=auto, leaf_size=30, metric=minkowski,
           n_neighbors=5, p=2, weights=uniform) 0.510666666667

In [76]:
my_data2 = np.recfromcsv('Data/trainingdata.txt')

print my_data2[-1]


('Tidehunter', 'Viper', 'Treant Protector', 'Axe', 'Dazzle', 'Magnus', 'Ursa', 'Bloodseeker', 'Omniknight', 'Outworld Devourer', 2)

In [111]:
my_data3 = my_data2

for j in xrange(len(my_data3)):
    for i in range(10):
       my_data3[j][i] = heroes_dict[my_data3[j][i]]

In [139]:
import itertools
list2d = [[1,2,3],[4,5,6], [7], [8,9]]
merged = list(itertools.chain.from_iterable(list2d))

In [168]:
for i in xrange(len(my_data3)):
    my_data3[i] = np.asarray(my_data3[i])

In [188]:
test = np.genfromtxt("Data/trainingdata.txt", names=True, delimiter=",", dtype=None)

In [193]:
my_data4 = np.array(my_data3.tolist())
print my_data4.shape


(14999, 11)

In [196]:
labels = my_data4[:, 10]

In [199]:
heroes2 = set()

for i in my_data4[:, 3]:
    heroes2.add(i)

In [201]:
len(my_data4)


Out[201]:
14999