In [1]:
import json
import numpy as np
import pandas as pd
from itertools import combinations, product

from atod import Hero, Heroes

In [2]:
n_heroes = 115

In [3]:
with open('data/players_in_matches.json', 'r') as fp:
    players_in_matches = json.load(fp)

In [4]:
# TODO:
# Print some info about dataset:
#  * first match date
#  * last match date
#  * number of matches

In [5]:
# TODO:
# def how_many_nans(X: pd.DataFrame) -> int:

In [6]:
matches = dict()

for record in players_in_matches:
    # create match in matches dictionary with arrays for
    # winners and losers ids
    matches.setdefault(str(record['match_id']), 
                       {
                        'winners': [],
                        'loosers': [],
                       }
                      )
    if record['win']:
        # add hero to winners of this match
        matches[str(record['match_id'])]['winners'].append(record['hero_id'])
    else:
        # add hero to losers
        matches[str(record['match_id'])]['loosers'].append(record['hero_id'])

# length of matches should be 10 times smaller than length of players...
# since there are 10 players in each match
assert len(matches), len(players_in_matches) / 10

In [7]:
# crete and fill 
# TODO: rename matrices
matches_together = np.zeros((n_heroes, n_heroes))
matches_won = np.zeros((n_heroes, n_heroes))
matches_lost = np.zeros((n_heroes, n_heroes))
matches_against = np.zeros((n_heroes, n_heroes))

for match in matches.values():
    # for winners
    # sorting is needed to have upper traingular matrix
    # combinations produces all heroes pairs with smaller id first
    for hero1, hero2 in combinations(sorted(match['winners']), 2):
        matches_together[hero1][hero2] += 1
        matches_won[hero1][hero2] += 1
        
    for hero1, hero2 in combinations(sorted(match['loosers']), 2):
        matches_together[hero1][hero2] += 1
        
    for looser, winner in product(match['loosers'], match['winners']):
        matches_against[looser][winner] += 1
        matches_against[winner][looser] += 1
        matches_lost[looser][winner] += 1

In [8]:
# minimum number of matches for pair of heroes to be included in dataset
min_matches_played = 10
max_winrate = .65
max_matches_together = max([max(a) for a in matches_together])
were_nulls = sum([a.shape[0] - np.count_nonzero(a) for a in matches_together])

# if combination of 2 heroes were used less than `min_matches` times,
# don't count their win(lose)rate (it would be NaN in result matrix)
matches_together[matches_together < min_matches_played] = np.NaN
matches_together[matches_together > max_winrate] = max_winrate
matches_against[matches_against < min_matches_played] = np.NaN

become_nulls = sum([a.shape[0] - np.count_nonzero(a) for a in matches_together])

print(become_nulls - were_nulls)


-6952
/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/ipykernel/__main__.py:10: RuntimeWarning: invalid value encountered in greater

In [9]:
# find maximum amount of matches played by 2 heroes
max_matches_played = np.nanmax([np.nanmax(hero) 
                                for hero in matches_together])

# some combinations were played more than another, so
# there is more confidence in picking this kind of heroes (tiny-wi)


/Library/Frameworks/Python.framework/Versions/3.5/lib/python3.5/site-packages/ipykernel/__main__.py:3: RuntimeWarning: All-NaN slice encountered
  app.launch_new_instance()

In [10]:
winrate_ = (matches_won / matches_together) * (1 + matches_together / max_matches_played)
winrate = pd.DataFrame(winrate_)
winrate.dropna(axis=0, how='all', inplace=True)
winrate.dropna(axis=1, how='all', inplace=True)
winrate.head()


Out[10]:
2 3 4 5 6 7 8 9 10 11 ... 105 106 107 108 109 110 111 112 113 114
1 212.307692 221.538462 NaN 249.230769 NaN 489.230769 73.846154 258.461538 NaN 249.230769 ... NaN 110.769231 138.461538 NaN NaN 83.076923 126.153846 243.076923 NaN 18.461538
2 NaN 156.923077 36.923077 304.615385 264.615385 326.153846 760.000000 606.153846 347.692308 587.692308 ... NaN 280.000000 301.538462 NaN 196.923077 212.307692 227.692308 147.692308 NaN 76.923077
3 NaN NaN 46.153846 150.769231 200.000000 286.153846 393.846154 1360.000000 113.846154 578.461538 ... NaN 443.076923 132.307692 15.384615 73.846154 67.692308 46.153846 141.538462 NaN NaN
4 NaN NaN NaN 70.769231 NaN 221.538462 46.153846 86.153846 12.307692 86.153846 ... 12.307692 55.384615 27.692308 NaN NaN 46.153846 27.692308 107.692308 NaN NaN
5 NaN NaN NaN NaN 83.076923 384.615385 886.153846 553.846154 264.615385 584.615385 ... NaN 510.769231 446.153846 55.384615 224.615385 76.923077 18.461538 46.153846 NaN 209.230769

5 rows × 112 columns


In [11]:
lose_rate_ = matches_lost / matches_against
lose_rate = pd.DataFrame(lose_rate_)
lose_rate.dropna(axis=0, how='all', inplace=True)
lose_rate.dropna(axis=1, how='all', inplace=True)
lose_rate.head()


Out[11]:
1 2 3 4 5 6 7 8 9 10 ... 105 106 107 108 109 110 111 112 113 114
1 NaN 0.484848 0.553191 0.478873 0.535032 0.611111 0.483607 0.529412 0.500000 0.466667 ... NaN 0.517241 0.551724 NaN 0.525424 0.489583 0.424242 0.486111 NaN 0.400000
2 0.515152 NaN 0.458333 0.652174 0.541436 0.595588 0.493766 0.511561 0.558442 0.526971 ... 0.3 0.521739 0.527660 0.428571 0.543860 0.543689 0.461538 0.552941 NaN 0.372093
3 0.446809 0.541667 NaN 0.380000 0.451220 0.471429 0.457680 0.517123 0.573770 0.427586 ... NaN 0.451613 0.549618 NaN 0.485714 0.442857 0.472222 0.413223 NaN NaN
4 0.521127 0.347826 0.620000 NaN 0.560000 0.511628 0.503145 0.486486 0.523077 0.708333 ... NaN 0.588235 0.590909 NaN 0.500000 0.578947 0.454545 0.500000 NaN NaN
5 0.464968 0.458564 0.548780 0.440000 NaN 0.474747 0.457875 0.495146 0.511962 0.446667 ... NaN 0.416667 0.435294 0.303030 0.461538 0.289157 0.462500 0.470588 NaN 0.414634

5 rows × 113 columns


In [12]:
n = winrate.shape[0]
# how many heroes pairs don't have enough matches to have
# meaningful winrate
n_bad_pairs = n**2 - winrate.count().sum() - (n**2 - n)/2
n_pairs = (n**2 - n)/2
print('Percent of pairs with not enough matches to count them:', 
      n_bad_pairs / n_pairs)


Percent of pairs with not enough matches to count them: 0.0346955796497

Building a pick

Idea: user gives 2 heroes as input, after that algorithms searches for the best next hero till there are 5 of them. The best hero would be choosen by maximazing the weight of edges in heroes graph. Heroes graph -- vertices are rows in winrate matrix and edges are winrates of heroes pairs.


In [13]:
def get_next_hero(pick, against=[], ban=[]):
    best_connection = -100
    next_pick = 0

    for next_hero_id in winrate.index:
        # if this hero is not in the opening
        if next_hero_id not in pick and next_hero_id not in ban \
                and next_hero_id not in against:
                
            total_connection = 0
            for picked_hero in pick:
                hero1, hero2 = sorted([next_hero_id, picked_hero])
                total_connection += winrate.loc[hero1][hero2]
                
            for enemy in against:
                total_connection -= lose_rate.loc[next_hero_id][enemy]

            if total_connection > best_connection:
                best_hero = next_hero_id
                best_connection = total_connection

    return best_hero.item()

In [25]:
pick = Heroes()
pick.add(Hero.from_name(''))

ban = Heroes()
ban.add(Hero.from_name('Shadow Fiend'))
ban.add(Hero.from_name('Invoker'))

against = Heroes()
against.add(Hero.from_name('Slardar'))
against.add(Hero.from_name('Witch Doctor'))

while len(pick) < 5:
    next_hero = get_next_hero(list(pick.get_ids()),
                              ban=list(ban.get_ids()),
                              against=list(against.get_ids()))
    pick.add(Hero(next_hero))
    
print(pick.get_names())


['Oracle', 'Juggernaut', 'Vengeful Spirit', 'Batrider', 'Rubick']

A lot of attempts to build a pick from a random hero gave me the next thought: maximum weighted winrate should be limited by some value. Because otherwise, same combinations of heroes will appear over and over again. For example, all the values in winrate matrix more than .6 should be equal to .6 or weights should be somehow. First idea really improves performance!


In [15]:
h1 = Hero(4)
h2 = Hero(108)
print(h1.name, h2.name)


Bloodseeker Underlord

In [ ]: