In this file I'm gonna try to find appropriate description and distance functions for heroes. This is needed to apply Collaborative filtering in the future.


In [1]:
import json
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import (euclidean_distances,
                                      manhattan_distances,
                                      cosine_similarity,
                                      cosine_distances)
from itertools import combinations, product
from atod import Heroes, Hero

In [2]:
heroes = Heroes.all()
data = pd.DataFrame([h.get_description(['name', 'laning', 'role', 'type'])
                     for h in heroes])
data.head()


No abilities for this HeroID == 16
Out[2]:
name laning_provides_babysit laning_provides_setup laning_requires_babysit laning_requires_farm laning_requires_setup laning_solo_desire laning_survival_rating role_disabler role_nuker ... role_jungler role_carry type_ganker type_hard_carry type_nuker type_pure_support type_push_support type_semi_carry type_stun_support type_tank
0 Faceless Void 0 0 2 2 2 0 1 2.0 0.0 ... 0.0 2.0 0 1 0 0 0 0 0 0
1 Tidehunter 1 1 0 0 0 1 2 2.0 1.0 ... 0.0 0.0 0 0 0 0 0 0 1 1
2 Tinker 2 0 0 1 1 1 1 0.0 3.0 ... 0.0 1.0 1 0 0 0 1 0 0 0
3 Vengeful Spirit 2 2 0 0 0 0 1 2.0 1.0 ... 0.0 0.0 1 0 0 0 0 0 1 0
4 Anti-Mage 0 0 2 2 1 1 2 0.0 1.0 ... 0.0 3.0 0 1 0 0 0 0 0 0

5 rows × 25 columns


In [3]:
def hero_vector(name: str):
    return data[data['name'] == name].drop(['name'], axis=1)

In [4]:
# find all the closest heroes to the 
puck = hero_vector('Io')
distances = list()

for hero in heroes:
    distances.append((hero.name, 
                      cosine_similarity(puck, hero_vector(hero.name))[0][0]))
    
distances = list(reversed(sorted(distances, key=lambda x: x[1])))
print(distances[:10])


[('Io', 0.99999999999999978), ('Keeper of the Light', 0.83650191257130413), ('Lich', 0.82478609884232257), ('Omniknight', 0.80317450514460775), ('Dazzle', 0.78354679390020654), ('Mirana', 0.75592894601845451), ('Abaddon', 0.72524066762284223), ('Bounty Hunter', 0.71838111651923897), ('Oracle', 0.7127864449672372), ('Visage', 0.70929936561519058)]

Second idea

Something like hero2vec: from 48k professional matches, if heroes were used together and won, add won_weight to $hero2vec[i][j]$, where $i$ - id of the first hero, $j$ - id of the second hero for all pairs of heroes in pick.


In [5]:
with open('data/players_in_matches.json') as fp:
    players_in_matches = json.load(fp)

In [6]:
matches = dict()

for record in players_in_matches:
    # create match in matches dictionary with arrays for
    # winners and losers ids
    matches.setdefault(str(record['match_id']), 
                       {
                        'winners': [],
                        'loosers': [],
                       }
                      )
    if record['win']:
        # add hero to winners of this match
        matches[str(record['match_id'])]['winners'].append(record['hero_id'])
    else:
        # add hero to losers
        matches[str(record['match_id'])]['loosers'].append(record['hero_id'])

# length of matches should be 10 times smaller than length of players...
# since there are 10 players in each match
assert len(matches), len(players_in_matches) / 10

In [7]:
n_heroes = 115
hero2vec = np.zeros((n_heroes, n_heroes))

for match in matches.values():
    # for winners
    # sorting is needed to have upper traingular matrix
    # combinations produces all heroes pairs with smaller id first
    for hero1, hero2 in product(match['winners'], repeat=2):
        hero2vec[hero1][hero2] += 1
        
    for hero1, hero2 in product(match['loosers'], repeat=2):
        hero2vec[hero1][hero2] += .75

In [10]:
id1 = Hero.from_name('Naga Siren').id
id2 = Hero.from_name('Anti-Mage').id
id3 = Hero.from_name('Beastmaster').id
id4 = Hero.from_name('Dazzle').id
id5 = Hero.from_name('Oracle').id

# print(hero2vec[id1])

print(cosine_distances(hero2vec[id1], hero2vec[id2]))
print(cosine_distances(hero2vec[id2], hero2vec[id3]))
print(cosine_distances(hero2vec[id1], hero2vec[id3]))
print(cosine_distances(hero2vec[id4], hero2vec[id5]))


[[ 0.78849408]]
[[ 0.76660642]]
[[ 0.81325883]]
[[ 0.84008312]]
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)

In [ ]: