In [75]:
# Importing the libraries
import pandas as pd
import numpy as np
from sklearn.cross_validation import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import SVC
from collections import Counter
from collections import defaultdict
import operator
In [7]:
data = pd.read_csv('Data/trainingdata.txt', header=None)
data.columns = ['team1_hero1',"team1_hero2", "team1_hero3", "team1_hero4", "team1_hero5", "team2_hero1", "team2_hero2","team2_hero3", "team2_hero4", "team2_hero5", "Winning_team"]
labels = data.Winning_team
In [8]:
data.head(10)
Out[8]:
In [9]:
# Columns name list
# col_ind =['team1_hero1',"team1_hero2", "team1_hero3", "team1_hero4", "team1_hero5", "team2_hero1", "team2_hero2","team2_hero3", "team2_hero4", "team2_hero5"]
# Getting the name of all the heroes
heroes = set()
for j in data['team1_hero1']:
heroes.add(j)
heroes_dict = defaultdict()
for k,v in enumerate(heroes):
heroes_dict[v] = k
In [55]:
test = np.genfromtxt('Data/trainingdata.txt', delimiter=',')
In [56]:
test
Out[56]:
In [47]:
# Columns name list
col_ind =['team1_hero1',"team1_hero2", "team1_hero3", "team1_hero4", "team1_hero5", "team2_hero1", "team2_hero2","team2_hero3", "team2_hero4", "team2_hero5"]
data2 = pd.DataFrame(data)
data3 = data2
In [50]:
data4 = data3.replace(heroes_dict.keys(), heroes_dict.values())
data4.head()
Out[50]:
In [197]:
x_train, x_test, y_train, y_test = train_test_split(my_data4, labels, test_size=0.25)
In [203]:
from sklearn.metrics import classification_report
target_names = ['Team 1', 'Team 2']
clfs =[("SVM", SVC(gamma=0.001)),
("RandomForest",RandomForestClassifier()),
("Nearest Neighbor",KNeighborsClassifier())]
for algo, clf in clfs:
clf.fit(x_train, y_train)
preds = clf.predict(x_test)
accuracy = np.sum(np.where(preds==y_test,1.0,0.0))/ len(y_test)
y_pred = np.array(preds)
print(classification_report(y_test, y_pred, target_names=target_names))
print clf, accuracy
In [76]:
my_data2 = np.recfromcsv('Data/trainingdata.txt')
print my_data2[-1]
In [111]:
my_data3 = my_data2
for j in xrange(len(my_data3)):
for i in range(10):
my_data3[j][i] = heroes_dict[my_data3[j][i]]
In [139]:
import itertools
list2d = [[1,2,3],[4,5,6], [7], [8,9]]
merged = list(itertools.chain.from_iterable(list2d))
In [168]:
for i in xrange(len(my_data3)):
my_data3[i] = np.asarray(my_data3[i])
In [188]:
test = np.genfromtxt("Data/trainingdata.txt", names=True, delimiter=",", dtype=None)
In [193]:
my_data4 = np.array(my_data3.tolist())
print my_data4.shape
In [196]:
labels = my_data4[:, 10]
In [199]:
heroes2 = set()
for i in my_data4[:, 3]:
heroes2.add(i)
In [201]:
len(my_data4)
Out[201]: