In [1]:

    
import json
import numpy as np
import pandas as pd
from itertools import combinations, product

from atod import Hero, Heroes



In [2]:

    
n_heroes = 115



In [3]:

    
with open('data/players_in_matches.json', 'r') as fp:
    players_in_matches = json.load(fp)



In [4]:

    
# TODO:
# Print some info about dataset:
#  * first match date
#  * last match date
#  * number of matches



In [5]:

    
# TODO:
# def how_many_nans(X: pd.DataFrame) -> int:



In [6]:

    
matches = dict()

for record in players_in_matches:
    # create match in matches dictionary with arrays for
    # winners and losers ids
    matches.setdefault(str(record['match_id']), 
                       {
                        'winners': [],
                        'loosers': [],
                       }
                      )
    if record['win']:
        # add hero to winners of this match
        matches[str(record['match_id'])]['winners'].append(record['hero_id'])
    else:
        # add hero to losers
        matches[str(record['match_id'])]['loosers'].append(record['hero_id'])

# length of matches should be 10 times smaller than length of players...
# since there are 10 players in each match
assert len(matches), len(players_in_matches) / 10



In [7]:

    
# crete and fill 
# TODO: rename matrices
matches_together = np.zeros((n_heroes, n_heroes))
matches_won = np.zeros((n_heroes, n_heroes))
matches_lost = np.zeros((n_heroes, n_heroes))
matches_against = np.zeros((n_heroes, n_heroes))

for match in matches.values():
    # for winners
    # sorting is needed to have upper traingular matrix
    # combinations produces all heroes pairs with smaller id first
    for hero1, hero2 in combinations(sorted(match['winners']), 2):
        matches_together[hero1][hero2] += 1
        matches_won[hero1][hero2] += 1
        
    for hero1, hero2 in combinations(sorted(match['loosers']), 2):
        matches_together[hero1][hero2] += 1
        
    for looser, winner in product(match['loosers'], match['winners']):
        matches_against[looser][winner] += 1
        matches_against[winner][looser] += 1
        matches_lost[looser][winner] += 1



In [8]:

    
# minimum number of matches for pair of heroes to be included in dataset
min_matches_played = 10
max_winrate = .65
max_matches_together = max([max(a) for a in matches_together])
were_nulls = sum([a.shape[0] - np.count_nonzero(a) for a in matches_together])

# if combination of 2 heroes were used less than `min_matches` times,
# don't count their win(lose)rate (it would be NaN in result matrix)
matches_together[matches_together < min_matches_played] = np.NaN
matches_together[matches_together > max_winrate] = max_winrate
matches_against[matches_against < min_matches_played] = np.NaN

become_nulls = sum([a.shape[0] - np.count_nonzero(a) for a in matches_together])

print(become_nulls - were_nulls)









    



-6952






    



/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/ipykernel/__main__.py:10: RuntimeWarning: invalid value encountered in greater



In [9]:

    
# find maximum amount of matches played by 2 heroes
max_matches_played = np.nanmax([np.nanmax(hero) 
                                for hero in matches_together])

# some combinations were played more than another, so
# there is more confidence in picking this kind of heroes (tiny-wi)









    



/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/ipykernel/__main__.py:3: RuntimeWarning: All-NaN slice encountered
  app.launch_new_instance()



In [10]:

    
winrate_ = (matches_won / matches_together) * (1 + matches_together / max_matches_played)
winrate = pd.DataFrame(winrate_)
winrate.dropna(axis=0, how='all', inplace=True)
winrate.dropna(axis=1, how='all', inplace=True)
winrate.head()









    Out[10]:






  
    
      
      2
      3
      4
      5
      6
      7
      8
      9
      10
      11
      ...
      105
      106
      107
      108
      109
      110
      111
      112
      113
      114
    
  
  
    
      1
      212.307692
      221.538462
      NaN
      249.230769
      NaN
      489.230769
      73.846154
      258.461538
      NaN
      249.230769
      ...
      NaN
      110.769231
      138.461538
      NaN
      NaN
      83.076923
      126.153846
      243.076923
      NaN
      18.461538
    
    
      2
      NaN
      156.923077
      36.923077
      304.615385
      264.615385
      326.153846
      760.000000
      606.153846
      347.692308
      587.692308
      ...
      NaN
      280.000000
      301.538462
      NaN
      196.923077
      212.307692
      227.692308
      147.692308
      NaN
      76.923077
    
    
      3
      NaN
      NaN
      46.153846
      150.769231
      200.000000
      286.153846
      393.846154
      1360.000000
      113.846154
      578.461538
      ...
      NaN
      443.076923
      132.307692
      15.384615
      73.846154
      67.692308
      46.153846
      141.538462
      NaN
      NaN
    
    
      4
      NaN
      NaN
      NaN
      70.769231
      NaN
      221.538462
      46.153846
      86.153846
      12.307692
      86.153846
      ...
      12.307692
      55.384615
      27.692308
      NaN
      NaN
      46.153846
      27.692308
      107.692308
      NaN
      NaN
    
    
      5
      NaN
      NaN
      NaN
      NaN
      83.076923
      384.615385
      886.153846
      553.846154
      264.615385
      584.615385
      ...
      NaN
      510.769231
      446.153846
      55.384615
      224.615385
      76.923077
      18.461538
      46.153846
      NaN
      209.230769
    
  

5 rows × 112 columns



In [11]:

    
lose_rate_ = matches_lost / matches_against
lose_rate = pd.DataFrame(lose_rate_)
lose_rate.dropna(axis=0, how='all', inplace=True)
lose_rate.dropna(axis=1, how='all', inplace=True)
lose_rate.head()









    Out[11]:






  
    
      
      1
      2
      3
      4
      5
      6
      7
      8
      9
      10
      ...
      105
      106
      107
      108
      109
      110
      111
      112
      113
      114
    
  
  
    
      1
      NaN
      0.484848
      0.553191
      0.478873
      0.535032
      0.611111
      0.483607
      0.529412
      0.500000
      0.466667
      ...
      NaN
      0.517241
      0.551724
      NaN
      0.525424
      0.489583
      0.424242
      0.486111
      NaN
      0.400000
    
    
      2
      0.515152
      NaN
      0.458333
      0.652174
      0.541436
      0.595588
      0.493766
      0.511561
      0.558442
      0.526971
      ...
      0.3
      0.521739
      0.527660
      0.428571
      0.543860
      0.543689
      0.461538
      0.552941
      NaN
      0.372093
    
    
      3
      0.446809
      0.541667
      NaN
      0.380000
      0.451220
      0.471429
      0.457680
      0.517123
      0.573770
      0.427586
      ...
      NaN
      0.451613
      0.549618
      NaN
      0.485714
      0.442857
      0.472222
      0.413223
      NaN
      NaN
    
    
      4
      0.521127
      0.347826
      0.620000
      NaN
      0.560000
      0.511628
      0.503145
      0.486486
      0.523077
      0.708333
      ...
      NaN
      0.588235
      0.590909
      NaN
      0.500000
      0.578947
      0.454545
      0.500000
      NaN
      NaN
    
    
      5
      0.464968
      0.458564
      0.548780
      0.440000
      NaN
      0.474747
      0.457875
      0.495146
      0.511962
      0.446667
      ...
      NaN
      0.416667
      0.435294
      0.303030
      0.461538
      0.289157
      0.462500
      0.470588
      NaN
      0.414634
    
  

5 rows × 113 columns



In [12]:

    
n = winrate.shape[0]
# how many heroes pairs don't have enough matches to have
# meaningful winrate
n_bad_pairs = n**2 - winrate.count().sum() - (n**2 - n)/2
n_pairs = (n**2 - n)/2
print('Percent of pairs with not enough matches to count them:', 
      n_bad_pairs / n_pairs)









    



Percent of pairs with not enough matches to count them: 0.0346955796497

Building a pick

Idea: user gives 2 heroes as input, after that algorithms searches for the best next hero till there are 5 of them. The best hero would be choosen by maximazing the weight of edges in heroes graph. Heroes graph -- vertices are rows in winrate matrix and edges are winrates of heroes pairs.



In [13]:

    
def get_next_hero(pick, against=[], ban=[]):
    best_connection = -100
    next_pick = 0

    for next_hero_id in winrate.index:
        # if this hero is not in the opening
        if next_hero_id not in pick and next_hero_id not in ban \
                and next_hero_id not in against:
                
            total_connection = 0
            for picked_hero in pick:
                hero1, hero2 = sorted([next_hero_id, picked_hero])
                total_connection += winrate.loc[hero1][hero2]
                
            for enemy in against:
                total_connection -= lose_rate.loc[next_hero_id][enemy]

            if total_connection > best_connection:
                best_hero = next_hero_id
                best_connection = total_connection

    return best_hero.item()



In [25]:

    
pick = Heroes()
pick.add(Hero.from_name(''))

ban = Heroes()
ban.add(Hero.from_name('Shadow Fiend'))
ban.add(Hero.from_name('Invoker'))

against = Heroes()
against.add(Hero.from_name('Slardar'))
against.add(Hero.from_name('Witch Doctor'))

while len(pick) < 5:
    next_hero = get_next_hero(list(pick.get_ids()),
                              ban=list(ban.get_ids()),
                              against=list(against.get_ids()))
    pick.add(Hero(next_hero))
    
print(pick.get_names())









    



['Oracle', 'Juggernaut', 'Vengeful Spirit', 'Batrider', 'Rubick']

A lot of attempts to build a pick from a random hero gave me the next thought: maximum weighted winrate should be limited by some value. Because otherwise, same combinations of heroes will appear over and over again. For example, all the values in winrate matrix more than .6 should be equal to .6 or weights should be somehow. First idea really improves performance!



In [15]:

    
h1 = Hero(4)
h2 = Hero(108)
print(h1.name, h2.name)









    



Bloodseeker Underlord



In [ ]:

	2	3	4	5	6	7	8	9	10	11	...	105	106	107	108	109	110	111	112	113	114
1	212.307692	221.538462	NaN	249.230769	NaN	489.230769	73.846154	258.461538	NaN	249.230769	...	NaN	110.769231	138.461538	NaN	NaN	83.076923	126.153846	243.076923	NaN	18.461538
2	NaN	156.923077	36.923077	304.615385	264.615385	326.153846	760.000000	606.153846	347.692308	587.692308	...	NaN	280.000000	301.538462	NaN	196.923077	212.307692	227.692308	147.692308	NaN	76.923077
3	NaN	NaN	46.153846	150.769231	200.000000	286.153846	393.846154	1360.000000	113.846154	578.461538	...	NaN	443.076923	132.307692	15.384615	73.846154	67.692308	46.153846	141.538462	NaN	NaN
4	NaN	NaN	NaN	70.769231	NaN	221.538462	46.153846	86.153846	12.307692	86.153846	...	12.307692	55.384615	27.692308	NaN	NaN	46.153846	27.692308	107.692308	NaN	NaN
5	NaN	NaN	NaN	NaN	83.076923	384.615385	886.153846	553.846154	264.615385	584.615385	...	NaN	510.769231	446.153846	55.384615	224.615385	76.923077	18.461538	46.153846	NaN	209.230769

	1	2	3	4	5	6	7	8	9	10	...	105	106	107	108	109	110	111	112	113	114
1	NaN	0.484848	0.553191	0.478873	0.535032	0.611111	0.483607	0.529412	0.500000	0.466667	...	NaN	0.517241	0.551724	NaN	0.525424	0.489583	0.424242	0.486111	NaN	0.400000
2	0.515152	NaN	0.458333	0.652174	0.541436	0.595588	0.493766	0.511561	0.558442	0.526971	...	0.3	0.521739	0.527660	0.428571	0.543860	0.543689	0.461538	0.552941	NaN	0.372093
3	0.446809	0.541667	NaN	0.380000	0.451220	0.471429	0.457680	0.517123	0.573770	0.427586	...	NaN	0.451613	0.549618	NaN	0.485714	0.442857	0.472222	0.413223	NaN	NaN
4	0.521127	0.347826	0.620000	NaN	0.560000	0.511628	0.503145	0.486486	0.523077	0.708333	...	NaN	0.588235	0.590909	NaN	0.500000	0.578947	0.454545	0.500000	NaN	NaN
5	0.464968	0.458564	0.548780	0.440000	NaN	0.474747	0.457875	0.495146	0.511962	0.446667	...	NaN	0.416667	0.435294	0.303030	0.461538	0.289157	0.462500	0.470588	NaN	0.414634