In this file I'm gonna try to find appropriate description and distance functions for heroes. This is needed to apply Collaborative filtering in the future.



In [1]:

    
import json
import numpy as np
import pandas as pd
from sklearn.metrics.pairwise import (euclidean_distances,
                                      manhattan_distances,
                                      cosine_similarity,
                                      cosine_distances)
from itertools import combinations, product
from atod import Heroes, Hero



In [2]:

    
heroes = Heroes.all()
data = pd.DataFrame([h.get_description(['name', 'laning', 'role', 'type'])
                     for h in heroes])
data.head()









    



No abilities for this HeroID == 16






    Out[2]:







  
    
      
      name
      laning_provides_babysit
      laning_provides_setup
      laning_requires_babysit
      laning_requires_farm
      laning_requires_setup
      laning_solo_desire
      laning_survival_rating
      role_disabler
      role_nuker
      ...
      role_jungler
      role_carry
      type_ganker
      type_hard_carry
      type_nuker
      type_pure_support
      type_push_support
      type_semi_carry
      type_stun_support
      type_tank
    
  
  
    
      0
      Faceless Void
      0
      0
      2
      2
      2
      0
      1
      2.0
      0.0
      ...
      0.0
      2.0
      0
      1
      0
      0
      0
      0
      0
      0
    
    
      1
      Tidehunter
      1
      1
      0
      0
      0
      1
      2
      2.0
      1.0
      ...
      0.0
      0.0
      0
      0
      0
      0
      0
      0
      1
      1
    
    
      2
      Tinker
      2
      0
      0
      1
      1
      1
      1
      0.0
      3.0
      ...
      0.0
      1.0
      1
      0
      0
      0
      1
      0
      0
      0
    
    
      3
      Vengeful Spirit
      2
      2
      0
      0
      0
      0
      1
      2.0
      1.0
      ...
      0.0
      0.0
      1
      0
      0
      0
      0
      0
      1
      0
    
    
      4
      Anti-Mage
      0
      0
      2
      2
      1
      1
      2
      0.0
      1.0
      ...
      0.0
      3.0
      0
      1
      0
      0
      0
      0
      0
      0
    
  

5 rows × 25 columns



In [3]:

    
def hero_vector(name: str):
    return data[data['name'] == name].drop(['name'], axis=1)



In [4]:

    
# find all the closest heroes to the 
puck = hero_vector('Io')
distances = list()

for hero in heroes:
    distances.append((hero.name, 
                      cosine_similarity(puck, hero_vector(hero.name))[0][0]))
    
distances = list(reversed(sorted(distances, key=lambda x: x[1])))
print(distances[:10])









    



[('Io', 0.99999999999999978), ('Keeper of the Light', 0.83650191257130413), ('Lich', 0.82478609884232257), ('Omniknight', 0.80317450514460775), ('Dazzle', 0.78354679390020654), ('Mirana', 0.75592894601845451), ('Abaddon', 0.72524066762284223), ('Bounty Hunter', 0.71838111651923897), ('Oracle', 0.7127864449672372), ('Visage', 0.70929936561519058)]

Second idea

Something like hero2vec: from 48k professional matches, if heroes were used together and won, add won_weight to $hero2vec[i][j]$, where $i$ - id of the first hero, $j$ - id of the second hero for all pairs of heroes in pick.



In [5]:

    
with open('data/players_in_matches.json') as fp:
    players_in_matches = json.load(fp)



In [6]:

    
matches = dict()

for record in players_in_matches:
    # create match in matches dictionary with arrays for
    # winners and losers ids
    matches.setdefault(str(record['match_id']), 
                       {
                        'winners': [],
                        'loosers': [],
                       }
                      )
    if record['win']:
        # add hero to winners of this match
        matches[str(record['match_id'])]['winners'].append(record['hero_id'])
    else:
        # add hero to losers
        matches[str(record['match_id'])]['loosers'].append(record['hero_id'])

# length of matches should be 10 times smaller than length of players...
# since there are 10 players in each match
assert len(matches), len(players_in_matches) / 10



In [7]:

    
n_heroes = 115
hero2vec = np.zeros((n_heroes, n_heroes))

for match in matches.values():
    # for winners
    # sorting is needed to have upper traingular matrix
    # combinations produces all heroes pairs with smaller id first
    for hero1, hero2 in product(match['winners'], repeat=2):
        hero2vec[hero1][hero2] += 1
        
    for hero1, hero2 in product(match['loosers'], repeat=2):
        hero2vec[hero1][hero2] += .75



In [10]:

    
id1 = Hero.from_name('Naga Siren').id
id2 = Hero.from_name('Anti-Mage').id
id3 = Hero.from_name('Beastmaster').id
id4 = Hero.from_name('Dazzle').id
id5 = Hero.from_name('Oracle').id

# print(hero2vec[id1])

print(cosine_distances(hero2vec[id1], hero2vec[id2]))
print(cosine_distances(hero2vec[id2], hero2vec[id3]))
print(cosine_distances(hero2vec[id1], hero2vec[id3]))
print(cosine_distances(hero2vec[id4], hero2vec[id5]))









    



[[ 0.78849408]]
[[ 0.76660642]]
[[ 0.81325883]]
[[ 0.84008312]]






    



/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)
/Users/gasabr/.virtualenvs/dota/lib/python3.5/site-packages/sklearn/utils/validation.py:395: DeprecationWarning: Passing 1d arrays as data is deprecated in 0.17 and will raise ValueError in 0.19. Reshape your data either using X.reshape(-1, 1) if your data has a single feature or X.reshape(1, -1) if it contains a single sample.
  DeprecationWarning)



In [ ]:

	name	laning_provides_babysit	laning_provides_setup	laning_requires_babysit	laning_requires_farm	laning_requires_setup	laning_solo_desire	laning_survival_rating	role_disabler	role_nuker	...	role_carry	type_ganker	type_hard_carry	type_push_support	type_stun_support	type_tank
0	Faceless Void	0	0	2	2	2	0	1	2.0	0.0	...	2.0	0	1	0	0	0
1	Tidehunter	1	1	0	0	0	1	2	2.0	1.0	...	0.0	0	0	0	1	1
2	Tinker	2	0	0	1	1	1	1	0.0	3.0	...	1.0	1	0	1	0	0
3	Vengeful Spirit	2	2	0	0	0	0	1	2.0	1.0	...	0.0	1	0	0	1	0
4	Anti-Mage	0	0	2	2	1	1	2	0.0	1.0	...	3.0	0	1	0	0	0