In [1]:
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import glob
import datetime
import itertools
from time import sleep
import os
import seaborn as sns
from itertools import chain

In [2]:
print datetime.datetime.now()
validFilePaths = []
for f in os.listdir("data/anomaly_data"):
    filePath = os.path.join("data/anomaly_data", f)
    if os.path.isdir(filePath):
        continue
    if os.stat(filePath).st_size <= 3:
        continue
    validFilePaths.append(filePath)
    
numF = int(1 * len(validFilePaths))
print 'Using this many files {0}'.format(numF)
validFilePaths = np.random.choice(validFilePaths, numF, replace=False)
df_list = (pd.read_csv(f) for f in validFilePaths)
df = pd.concat(df_list, ignore_index=True)
df = df[df['radiant_win'].notnull()]


2017-06-20 21:51:22.869660
Using this many files 4088

In [3]:
df2 = pd.DataFrame(np.loadtxt('data/goodAnomaliesResidual.csv', delimiter = ','), 
                 columns = ["match_id", "residual"])

df = df.merge(df2, left_on='match_id', right_on='match_id')

matchDictionary = {}
heroColumns = [filter(lambda x: z in x, df.columns) for z in ['hero_id']][0]

# for each row, this function will return all the heroes that the match belongs in
# for example, if the match had an AM and Spectre, we should place this match in each of those bags
def heroesForMatch(row):
    pass

In [4]:
df = df[~df['match_id'].duplicated()]

In [5]:
df = df.sort_values(by='residual', ascending=False)

In [6]:
columns = df.columns
numericalFeatures = ['match_id', 'positive_votes', 'negative_votes', 'first_blood_time', 'radiant_win',
                    'duration', 'kills', 'deaths', 'assists', 'kpm', 'kda', 'hero_dmg',
                    'gpm', 'hero_heal', 'xpm', 'totalgold', 'totalxp', 'lasthits', 'denies',
                    'tower_kills', 'courier_kills', 'observer_uses', 'sentry_uses',
                    'ancient_kills', 'camps_stacked', 'abandons', 'residual']
numFeatures = [filter(lambda x: z in x, columns) for z in numericalFeatures]
numFeatures = list(chain(*numFeatures))
df = df[numFeatures]

In [7]:
df['match_id'].values.tolist()


Out[7]:
[3215497967,
 3215097220,
 3215312675,
 3215162338,
 3215181105,
 3215377994,
 3215271631,
 3215289100,
 3215279863,
 3215253055,
 3215448302,
 3215416370,
 3215261563,
 3215294948,
 3215155134,
 3215350255,
 3215341221,
 3215306187,
 3215302636,
 3215316820,
 3215190109,
 3215355847,
 3215130854,
 3215259897,
 3215394671,
 3215152225,
 3215274711,
 3215366609,
 3215449992,
 3215376405,
 3215268540,
 3215420902,
 3215332931,
 3215327547,
 3215354992,
 3215254939,
 3215149372,
 3215370025,
 3215201012,
 3215272070,
 3215223639,
 3215422492,
 3215260456,
 3215354934,
 3215377592,
 3215445972,
 3215101148,
 3215174827,
 3215384305,
 3215188370,
 3215297782,
 3215493515,
 3215119808,
 3215360521,
 3215102154,
 3215271055,
 3215403667,
 3215217460,
 3215482564,
 3215466626,
 3215255292,
 3215095789,
 3215108689,
 3215330831,
 3215468550,
 3215446132,
 3215384555,
 3215470445,
 3215318005,
 3215238829,
 3215293087,
 3215391855,
 3215439074,
 3215401870,
 3215440922,
 3215092973,
 3215369622,
 3215116689,
 3215236348,
 3215145375,
 3215129374,
 3215199686,
 3215481084,
 3215191852,
 3215459217,
 3215189407,
 3215462703,
 3215113055,
 3215469573,
 3215428974,
 3215323020,
 3215169878,
 3215359797,
 3215419225,
 3215221028,
 3215488213,
 3215198158,
 3215275453,
 3215204587,
 3215260718,
 3215153289,
 3215120631,
 3215400918,
 3215119120,
 3215419414,
 3215443584,
 3215276839,
 3215464545,
 3215118187,
 3215247802,
 3215343954,
 3215145695,
 3215365977,
 3215162352,
 3215344482,
 3215166724,
 3215457284,
 3215144772,
 3215254006,
 3215471576,
 3215267691,
 3215203102,
 3215460086,
 3215132768,
 3215444643,
 3215327292,
 3215107898,
 3215193832,
 3215135123,
 3215361786,
 3215242906,
 3215404311,
 3215472295,
 3215359384,
 3215261245,
 3215419991,
 3215164884,
 3215482334,
 3215344415,
 3215191160,
 3215269902,
 3215132609,
 3215229099,
 3215424121,
 3215214645,
 3215280848,
 3215324165,
 3215366819,
 3215121935,
 3215217456,
 3215478948,
 3215287068,
 3215328836,
 3215189005,
 3215180352,
 3215267872,
 3215325036,
 3215403728,
 3215386000,
 3215354548,
 3215107983,
 3215193319,
 3215174289,
 3215405510,
 3215486107,
 3215308046,
 3215375609,
 3215370507,
 3215356164,
 3215394202,
 3215475044,
 3215314163,
 3215232157,
 3215449600,
 3215150608,
 3215387413,
 3215116601,
 3215476161,
 3215498524,
 3215223648,
 3215347649,
 3215288708,
 3215250234,
 3215123893,
 3215102237,
 3215294572,
 3215169250,
 3215362362,
 3215401727,
 3215453213,
 3215312256,
 3215332342,
 3215312958,
 3215223737,
 3215285061,
 3215413686,
 3215324947,
 3215327320,
 3215267032,
 3215454284,
 3215357642,
 3215488755,
 3215224918,
 3215115434,
 3215486952,
 3215459746,
 3215330354,
 3215345885,
 3215129922,
 3215300804,
 3215475617,
 3215272277,
 3215423386,
 3215130289,
 3215483739,
 3215153586,
 3215135265,
 3215299981,
 3215316700,
 3215448897,
 3215206674,
 3215475240,
 3215411914,
 3215494824,
 3215208665,
 3215390160,
 3215251697,
 3215466606,
 3215391093,
 3215162013,
 3215299806,
 3215321270,
 3215287399,
 3215375892,
 3215195313,
 3215144420,
 3215379626,
 3215137884,
 3215375087,
 3215303959,
 3215437756,
 3215336973,
 3215416702,
 3215205916,
 3215220146,
 3215440465,
 3215270302,
 3215094959,
 3215298099,
 3215165027,
 3215433320,
 3215298273,
 3215455368,
 3215363503,
 3215100697,
 3215283180,
 3215410568,
 3215291565,
 3215385605,
 3215288289,
 3215148792,
 3215094660,
 3215347239,
 3215352617,
 3215107489,
 3215474947,
 3215404711,
 3215147040,
 3215180855,
 3215286517,
 3215308199,
 3215399746,
 3215142113,
 3215138351,
 3215276184,
 3215233902,
 3215292517,
 3215166848,
 3215477347,
 3215246062,
 3215136191,
 3215281167,
 3215459906,
 3215480662,
 3215287547,
 3215337213,
 3215276888,
 3215228456,
 3215282906,
 3215354545,
 3215243693,
 3215131987,
 3215220488,
 3215496947,
 3215129208,
 3215335248,
 3215477407,
 3215386429,
 3215189721,
 3215355241,
 3215114144,
 3215446878,
 3215138380,
 3215391557,
 3215452559,
 3215388921,
 3215457606,
 3215245376,
 3215182714,
 3215425334,
 3215146405,
 3215303729,
 3215297974,
 3215488250,
 3215291673,
 3215165758,
 3215215978,
 3215226575,
 3215426458,
 3215298264,
 3215375401,
 3215164902,
 3215439378,
 3215417419,
 3215129654,
 3215412289,
 3215119017,
 3215151576,
 3215363364,
 3215232872,
 3215100187,
 3215311956,
 3215379763,
 3215351593,
 3215409860,
 3215442394,
 3215353990,
 3215277734,
 3215229457,
 3215130835,
 3215092287,
 3215137548,
 3215410873,
 3215186260,
 3215314246,
 3215455747,
 3215125088,
 3215388049,
 3215387904,
 3215486254,
 3215349760,
 3215149581,
 3215325668,
 3215374088,
 3215303752,
 3215266899,
 3215095314,
 3215216831,
 3215222186,
 3215164824,
 3215310710,
 3215266966,
 3215415225,
 3215265780,
 3215418853,
 3215273920,
 3215171733,
 3215236033,
 3215274732,
 3215282518,
 3215365416,
 3215485464,
 3215489360,
 3215270915,
 3215306738,
 3215260577,
 3215141466,
 3215162054,
 3215395730,
 3215265050,
 3215312365,
 3215397328,
 3215110538,
 3215214818,
 3215435057,
 3215154136,
 3215356322,
 3215126746,
 3215249783,
 3215393413,
 3215163349,
 3215282633,
 3215152262,
 3215121064,
 3215328313,
 3215209313,
 3215435386,
 3215486232,
 3215337531,
 3215102476,
 3215253869,
 3215422166,
 3215263577,
 3215285461,
 3215114749,
 3215096854,
 3215446534,
 3215119491,
 3215205453,
 3215135325,
 3215415463,
 3215322189,
 3215345510,
 3215138506,
 3215492929,
 3215267165,
 3215123467,
 3215358156,
 3215094142,
 3215328883,
 3215250045,
 3215410384,
 3215168749,
 3215343292,
 3215107300,
 3215450701,
 3215239658,
 3215427391,
 3215403005,
 3215129173,
 3215301736,
 3215121346,
 3215136149,
 3215315656,
 3215435380,
 3215260528,
 3215131741,
 3215127161,
 3215352898,
 3215115754,
 3215383518,
 3215323457,
 3215465807,
 3215145418,
 3215380602,
 3215391171,
 3215191784,
 3215358722,
 3215433160,
 3215290038,
 3215193538,
 3215209365,
 3215253420,
 3215491015,
 3215217135,
 3215379611,
 3215413988,
 3215253204,
 3215404742,
 3215384989,
 3215199127,
 3215120092,
 3215108762,
 3215213377,
 3215349948,
 3215306421,
 3215202477,
 3215464043,
 3215268057,
 3215370518,
 3215439438,
 3215371756,
 3215477162,
 3215293625,
 3215296744,
 3215377187,
 3215251365,
 3215263061,
 3215358458,
 3215429839,
 3215209805,
 3215279179,
 3215405966,
 3215258832,
 3215179518,
 3215104961,
 3215107873,
 3215366168,
 3215430628,
 3215422313,
 3215365394,
 3215485880,
 3215457207,
 3215380931,
 3215445032,
 3215490011,
 3215137211,
 3215203727,
 3215165184,
 3215489708,
 3215452967,
 3215329371,
 3215127826,
 3215406396,
 3215230963,
 3215197322,
 3215109921,
 3215336230,
 3215447774,
 3215473864,
 3215130557,
 3215267128,
 3215317064,
 3215407823,
 3215334574,
 3215235851,
 3215432287,
 3215279599,
 3215277575,
 3215352751,
 3215193905,
 3215403773,
 3215470994,
 3215241393,
 3215475777,
 3215378128,
 3215255293,
 3215408793,
 3215186419,
 3215365233,
 3215319925,
 3215206639,
 3215101069,
 3215110880,
 3215279096,
 3215236308,
 3215381288,
 3215292947,
 3215199673,
 3215409332,
 3215099900,
 3215319240,
 3215115194,
 3215341642,
 3215288694,
 3215263248,
 3215248395,
 3215424333,
 3215388328,
 3215153855,
 3215113703,
 3215378253,
 3215478594,
 3215333633,
 3215314457,
 3215264390,
 3215185323,
 3215416462,
 3215294519,
 3215100773,
 3215419450,
 3215149559,
 3215207589,
 3215443285,
 3215294238,
 3215230801,
 3215420148,
 3215412427,
 3215231453,
 3215373592,
 3215246152,
 3215408093,
 3215095140,
 3215231193,
 3215402750,
 3215338649,
 3215453826,
 3215184198,
 3215315286,
 3215370517,
 3215360704,
 3215172975,
 3215131925,
 3215134357,
 3215311164,
 3215452739,
 3215393161,
 3215293239,
 3215114502,
 3215224556,
 3215273884,
 3215363634,
 3215460930,
 3215487728,
 3215273633,
 3215360865,
 3215431682,
 3215109989,
 3215237594,
 3215269721,
 3215094857,
 3215383204,
 3215374815,
 3215128099,
 3215241740,
 3215344197,
 3215303641,
 3215156007,
 3215091817,
 3215275030,
 3215389491,
 3215141012,
 3215369987,
 3215215211,
 3215231482,
 3215237574,
 3215368109,
 3215428944,
 3215401494,
 3215236425,
 3215453523,
 3215429486,
 3215325280,
 3215195387,
 3215351934,
 3215360397,
 3215281754,
 3215188607,
 3215248934,
 3215379022,
 3215248543,
 3215204076,
 3215469804,
 3215228885,
 3215142848,
 3215429599,
 3215271563,
 3215123800,
 3215141137,
 3215221304,
 3215146919,
 3215138954,
 3215226005,
 3215418104,
 3215485789,
 3215364262,
 3215384549,
 3215354207,
 3215221726,
 3215397936,
 3215253751,
 3215335055,
 3215178825,
 3215351093,
 3215263210,
 3215311818,
 3215111810,
 3215195203,
 3215457131,
 3215224003,
 3215323074,
 3215256552,
 3215290260,
 3215100967,
 3215406255,
 3215317707,
 3215471566,
 3215299694,
 3215334730,
 3215233853,
 3215233472,
 3215139044,
 3215385425,
 3215242754,
 3215168054,
 3215470801,
 3215290794,
 3215202214,
 3215220375,
 3215139027,
 3215158933,
 3215429365,
 3215249296,
 3215412550,
 3215158795,
 3215428180,
 3215248555,
 3215092943,
 3215477813,
 3215357991,
 3215121936,
 3215451491,
 3215179481,
 3215339823,
 3215213742,
 3215291849,
 3215359386,
 3215463125,
 3215320771,
 3215156866,
 3215146289,
 3215436023,
 3215120403,
 3215356956,
 3215349109,
 3215293231,
 3215295277,
 3215332926,
 3215468557,
 3215099428,
 3215320538,
 3215097367,
 3215410767,
 3215319194,
 3215189993,
 3215450560,
 3215415472,
 3215473056,
 3215218922,
 3215398154,
 3215196752,
 3215116970,
 3215154857,
 3215378775,
 3215422840,
 3215438030,
 3215467864,
 3215148515,
 3215127261,
 3215403415,
 3215371556,
 3215373819,
 3215289869,
 3215261385,
 3215113646,
 3215093449,
 3215397566,
 3215394731,
 3215202644,
 3215122425,
 3215232510,
 3215376702,
 3215271022,
 3215410326,
 3215238449,
 3215219242,
 3215314765,
 3215415407,
 3215454340,
 3215159145,
 3215496824,
 3215153285,
 3215421241,
 3215158522,
 3215285625,
 3215409861,
 3215146832,
 3215350288,
 3215138718,
 3215216041,
 3215430085,
 3215162220,
 3215495154,
 3215260875,
 3215240410,
 3215315676,
 3215259776,
 3215322753,
 3215232566,
 3215155195,
 3215219753,
 3215153655,
 3215443368,
 3215100159,
 3215362400,
 3215227158,
 3215114738,
 3215377124,
 3215387761,
 3215490323,
 3215224622,
 3215465088,
 3215280953,
 3215356704,
 3215432469,
 3215209576,
 3215320141,
 3215150494,
 3215334861,
 3215168791,
 3215310210,
 3215460927,
 3215440346,
 3215470802,
 3215129573,
 3215453894,
 3215327057,
 3215229668,
 3215248682,
 3215159362,
 3215139650,
 3215291485,
 3215450401,
 3215217478,
 3215373808,
 3215293059,
 3215134209,
 3215205881,
 3215303809,
 3215264353,
 3215268085,
 3215283260,
 3215344640,
 3215184925,
 3215335004,
 3215168443,
 3215193183,
 3215278547,
 3215183829,
 3215184150,
 3215163824,
 3215363725,
 3215404047,
 3215237753,
 3215487652,
 3215429858,
 3215166954,
 3215133584,
 3215178851,
 3215448807,
 3215273126,
 3215168647,
 3215236617,
 3215250424,
 3215435433,
 3215106537,
 3215425535,
 3215403559,
 3215279143,
 3215273425,
 3215236699,
 3215420829,
 3215249423,
 3215204448,
 3215354835,
 3215110230,
 3215354404,
 3215269421,
 3215475433,
 3215122079,
 3215120445,
 3215483256,
 3215270003,
 3215239180,
 3215449129,
 3215466202,
 3215446800,
 3215404824,
 3215216964,
 3215292094,
 3215244813,
 3215257022,
 3215138667,
 3215116447,
 3215186293,
 3215168235,
 3215324033,
 3215203845,
 3215198821,
 3215303652,
 3215328046,
 3215373453,
 3215449613,
 3215349133,
 3215230728,
 3215465483,
 3215360232,
 3215242866,
 3215186737,
 3215303349,
 3215229664,
 3215358878,
 3215290969,
 3215115985,
 3215322709,
 3215364163,
 3215160302,
 3215280768,
 3215296742,
 3215491599,
 3215105569,
 3215125998,
 3215160079,
 3215387762,
 3215469210,
 3215489589,
 3215101312,
 3215443055,
 3215355623,
 3215140780,
 3215091713,
 3215246979,
 3215198239,
 3215464565,
 3215210891,
 3215418615,
 3215189899,
 3215408690,
 3215398951,
 3215107442,
 3215379218,
 3215114940,
 3215131372,
 3215429005,
 3215228880,
 3215464416,
 3215268798,
 3215197421,
 3215230662,
 3215367208,
 3215416423,
 3215233122,
 3215176893,
 3215128471,
 3215220038,
 3215316757,
 3215470163,
 3215130943,
 3215468280,
 3215196602,
 3215426455,
 3215350260,
 3215192607,
 3215232226,
 3215162029,
 3215244712,
 3215466440,
 3215199992,
 3215482756,
 3215271541,
 3215195629,
 3215270783,
 3215259663,
 3215436836,
 3215256304,
 3215491011,
 3215381245,
 3215359319,
 3215222660,
 3215449248,
 3215110552,
 3215117067,
 3215374344,
 3215358157,
 3215287506,
 3215458174,
 3215110350,
 3215113122,
 3215485265,
 3215374250,
 3215113325,
 3215265759,
 3215322186,
 3215265172,
 3215421174,
 3215379819,
 3215093702,
 3215096117,
 3215492042,
 3215326780,
 3215223250,
 3215335663,
 3215118408,
 3215090968,
 3215420674,
 3215379283,
 3215242491,
 3215124675,
 3215494997,
 3215356775,
 3215244219,
 3215254617,
 3215232682,
 3215208828,
 3215145084,
 3215358667,
 3215447392,
 3215289158,
 3215161481,
 3215132824,
 3215173685,
 3215376877,
 3215336686,
 3215149242,
 3215308356,
 3215463785,
 3215143402,
 3215347623,
 3215123239,
 3215205312,
 ...]

In [8]:
df['residual'].values.tolist()


Out[8]:
[60.09383010864258,
 22.98058319091797,
 14.018461227416992,
 12.276541709899902,
 10.960655212402344,
 10.190336227416992,
 9.797115325927734,
 8.665862083435059,
 8.291083335876465,
 6.753566265106201,
 5.982767105102539,
 5.009283542633057,
 4.787799835205078,
 4.737585544586182,
 4.151325702667236,
 3.8586721420288086,
 3.7200565338134766,
 3.653451919555664,
 3.6120126247406006,
 3.55796217918396,
 3.5249321460723877,
 3.4550528526306152,
 3.4301578998565674,
 2.9261271953582764,
 2.8132457733154297,
 2.767648220062256,
 2.758488178253174,
 2.543581247329712,
 2.5158803462982178,
 2.4831578731536865,
 2.4762089252471924,
 2.392141342163086,
 2.38230299949646,
 2.3297765254974365,
 2.2398288249969482,
 2.229811429977417,
 2.1894421577453613,
 2.1700241565704346,
 2.150897741317749,
 2.0817346572875977,
 2.0602142810821533,
 2.044538974761963,
 2.038116693496704,
 2.0010886192321777,
 1.953024983406067,
 1.9425238370895386,
 1.919196367263794,
 1.8698474168777466,
 1.8231494426727295,
 1.7869755029678345,
 1.7188692092895508,
 1.7181131839752197,
 1.7047621011734009,
 1.6976909637451172,
 1.691680669784546,
 1.6713063716888428,
 1.6653168201446533,
 1.6202446222305298,
 1.6132309436798096,
 1.6028668880462646,
 1.581583023071289,
 1.5775842666625977,
 1.5626850128173828,
 1.5582425594329834,
 1.5492855310440063,
 1.5385805368423462,
 1.5317155122756958,
 1.5211113691329956,
 1.5076344013214111,
 1.4967833757400513,
 1.4829853773117065,
 1.4817631244659424,
 1.4691892862319946,
 1.4565132856369019,
 1.4442218542099,
 1.4434107542037964,
 1.4333841800689697,
 1.4322049617767334,
 1.427970290184021,
 1.424288272857666,
 1.4241111278533936,
 1.4210336208343506,
 1.4192612171173096,
 1.4180662631988525,
 1.418045997619629,
 1.4131355285644531,
 1.3996341228485107,
 1.3977913856506348,
 1.392609715461731,
 1.3883095979690552,
 1.382023572921753,
 1.3814785480499268,
 1.3794400691986084,
 1.364009976387024,
 1.3631293773651123,
 1.3609627485275269,
 1.358451247215271,
 1.3547123670578003,
 1.3397060632705688,
 1.3350133895874023,
 1.3328020572662354,
 1.3265687227249146,
 1.3234529495239258,
 1.3199751377105713,
 1.2998747825622559,
 1.297800898551941,
 1.2892454862594604,
 1.2862396240234375,
 1.283833622932434,
 1.2751528024673462,
 1.2714056968688965,
 1.2680243253707886,
 1.2384188175201416,
 1.2283591032028198,
 1.222191572189331,
 1.2142605781555176,
 1.206538200378418,
 1.2041807174682617,
 1.1953727006912231,
 1.1952720880508423,
 1.182854175567627,
 1.181726336479187,
 1.179439902305603,
 1.171888828277588,
 1.1665818691253662,
 1.16591215133667,
 1.162890076637268,
 1.1562278270721436,
 1.1520692110061646,
 1.144974946975708,
 1.1360812187194824,
 1.1236978769302368,
 1.120529055595398,
 1.118119239807129,
 1.117451548576355,
 1.1165120601654053,
 1.1132597923278809,
 1.1122854948043823,
 1.1096220016479492,
 1.1047585010528564,
 1.1041306257247925,
 1.1024683713912964,
 1.1014271974563599,
 1.0916112661361694,
 1.0913853645324707,
 1.0900408029556274,
 1.084211826324463,
 1.083559513092041,
 1.0815109014511108,
 1.080784797668457,
 1.079131007194519,
 1.0745054483413696,
 1.0729814767837524,
 1.072069764137268,
 1.0666135549545288,
 1.064724326133728,
 1.0637664794921875,
 1.0632541179656982,
 1.0616685152053833,
 1.058560848236084,
 1.051201343536377,
 1.0496363639831543,
 1.0495151281356812,
 1.049206018447876,
 1.0463898181915283,
 1.0434927940368652,
 1.0426044464111328,
 1.0395288467407227,
 1.0386379957199097,
 1.0352888107299805,
 1.0337138175964355,
 1.033097743988037,
 1.032279133796692,
 1.0288360118865967,
 1.0225169658660889,
 1.0223387479782104,
 1.0201956033706665,
 1.0195045471191406,
 1.0138232707977295,
 1.0096408128738403,
 1.0063974857330322,
 1.0060631036758423,
 1.0054899454116821,
 1.0046826601028442,
 1.0020874738693237,
 1.0011916160583496,
 0.999028205871582,
 0.9979208707809448,
 0.9951856136322021,
 0.9943534135818481,
 0.9923041462898254,
 0.9922865629196167,
 0.9902551770210266,
 0.9878281354904175,
 0.9870291948318481,
 0.9827179312705994,
 0.9821138381958008,
 0.9803048968315125,
 0.9795767068862915,
 0.9786282777786255,
 0.9723061919212341,
 0.970488965511322,
 0.9695155620574951,
 0.9688208103179932,
 0.9679350256919861,
 0.9669782519340515,
 0.9644799828529358,
 0.9643834233283997,
 0.9639016389846802,
 0.9597914218902588,
 0.9596630334854126,
 0.9576871991157532,
 0.9555385112762451,
 0.9550148248672485,
 0.9534093141555786,
 0.9518917202949524,
 0.9480692744255066,
 0.945451021194458,
 0.9449933171272278,
 0.9447908401489258,
 0.9431512951850891,
 0.9390014410018921,
 0.9389370083808899,
 0.9385173320770264,
 0.9377952814102173,
 0.9375945329666138,
 0.936521053314209,
 0.9340055584907532,
 0.9331918358802795,
 0.9330150485038757,
 0.9317139983177185,
 0.9304264783859253,
 0.9288665056228638,
 0.9260517358779907,
 0.9249129891395569,
 0.9222087264060974,
 0.9214598536491394,
 0.9209437966346741,
 0.9209313988685608,
 0.9208395481109619,
 0.9197496175765991,
 0.9185452461242676,
 0.9154036641120911,
 0.9132921695709229,
 0.913219690322876,
 0.9126948118209839,
 0.9088218212127686,
 0.9086437821388245,
 0.9084234237670898,
 0.9055785536766052,
 0.9055338501930237,
 0.9030809998512268,
 0.9009914398193359,
 0.8999032974243164,
 0.899492621421814,
 0.8974443078041077,
 0.8955872058868408,
 0.8940939903259277,
 0.8897515535354614,
 0.8891883492469788,
 0.8884326219558716,
 0.8873404860496521,
 0.8867751955986023,
 0.8864952921867371,
 0.886139452457428,
 0.8853210210800171,
 0.8835608959197998,
 0.8831455707550049,
 0.8814792633056641,
 0.8786903023719788,
 0.8786889314651489,
 0.8786466121673584,
 0.878573477268219,
 0.876968264579773,
 0.8747633695602417,
 0.8735519647598267,
 0.8730534911155701,
 0.8729946613311768,
 0.8721534609794617,
 0.8719340562820435,
 0.8715412616729736,
 0.8697844743728638,
 0.8684020638465881,
 0.8658937811851501,
 0.8647394180297852,
 0.8627612590789795,
 0.8623327016830444,
 0.8616533875465393,
 0.8615354299545288,
 0.8612295389175415,
 0.8595142364501953,
 0.8593857288360596,
 0.8583488464355469,
 0.8570705652236938,
 0.8564101457595825,
 0.8563953042030334,
 0.8560832738876343,
 0.8552137017250061,
 0.8547729253768921,
 0.8529301881790161,
 0.851856529712677,
 0.8514204025268555,
 0.8499778509140015,
 0.8469444513320923,
 0.8467068672180176,
 0.846032440662384,
 0.845173716545105,
 0.8448122143745422,
 0.8432825803756714,
 0.8417072296142578,
 0.8406412601470947,
 0.8391038179397583,
 0.8389773368835449,
 0.8380493521690369,
 0.8373729586601257,
 0.8368411064147949,
 0.836216926574707,
 0.8358647227287292,
 0.8336787223815918,
 0.8333789110183716,
 0.8329710364341736,
 0.8316418528556824,
 0.8302108645439148,
 0.830048143863678,
 0.8297022581100464,
 0.8295499682426453,
 0.8286826014518738,
 0.8277784585952759,
 0.827299177646637,
 0.8271003365516663,
 0.8269991278648376,
 0.8262494802474976,
 0.8259848952293396,
 0.8258390426635742,
 0.8255735635757446,
 0.8250579833984375,
 0.8247339129447937,
 0.8241198658943176,
 0.8225634694099426,
 0.8194668889045715,
 0.8185660243034363,
 0.8180004358291626,
 0.8172050714492798,
 0.8170410990715027,
 0.8164354562759399,
 0.8154846429824829,
 0.8146878480911255,
 0.8123286962509155,
 0.8110998868942261,
 0.8109608292579651,
 0.810543417930603,
 0.8094967007637024,
 0.8084927797317505,
 0.8075140118598938,
 0.8060798048973083,
 0.8060481548309326,
 0.8048883676528931,
 0.8044706583023071,
 0.8039005994796753,
 0.8037306070327759,
 0.8031567335128784,
 0.8023133277893066,
 0.7998040914535522,
 0.7996733784675598,
 0.7978358864784241,
 0.7977395057678223,
 0.7967928051948547,
 0.7952989935874939,
 0.7938899993896484,
 0.7934744358062744,
 0.7932054400444031,
 0.792657196521759,
 0.792655348777771,
 0.7925220727920532,
 0.7915612459182739,
 0.7906990051269531,
 0.7903063893318176,
 0.7900872230529785,
 0.7893491983413696,
 0.7878237962722778,
 0.7857347726821899,
 0.7855825424194336,
 0.7842476963996887,
 0.7831416130065918,
 0.7821829915046692,
 0.7811717391014099,
 0.7788980007171631,
 0.7783818244934082,
 0.7776341438293457,
 0.7757152915000916,
 0.7753698825836182,
 0.7746968865394592,
 0.7746821045875549,
 0.7739043831825256,
 0.7726438641548157,
 0.7714319229125977,
 0.770415723323822,
 0.7700993418693542,
 0.7694905996322632,
 0.7671429514884949,
 0.7669565677642822,
 0.7655788660049438,
 0.765575647354126,
 0.7641459107398987,
 0.7632062435150146,
 0.7624366879463196,
 0.7618311047554016,
 0.761588990688324,
 0.7593628168106079,
 0.7590985894203186,
 0.7585266828536987,
 0.7580933570861816,
 0.7577105760574341,
 0.7575187087059021,
 0.7572482824325562,
 0.7569153308868408,
 0.7563158273696899,
 0.7544280290603638,
 0.7543737888336182,
 0.7539798021316528,
 0.7536249160766602,
 0.7536008954048157,
 0.7532666325569153,
 0.7531739473342896,
 0.752880871295929,
 0.7528793811798096,
 0.7526946067810059,
 0.751673698425293,
 0.7499583959579468,
 0.7496762275695801,
 0.7489606142044067,
 0.7485086917877197,
 0.7477176785469055,
 0.7476752400398254,
 0.7472490668296814,
 0.7472139596939087,
 0.7466698884963989,
 0.7460474967956543,
 0.7460435628890991,
 0.744686484336853,
 0.7445034384727478,
 0.7432817220687866,
 0.7422952055931091,
 0.7414557337760925,
 0.7412257194519043,
 0.7403615713119507,
 0.7403483986854553,
 0.7383480072021484,
 0.7383477091789246,
 0.7380225658416748,
 0.7366001009941101,
 0.7361823320388794,
 0.7343788743019104,
 0.733960747718811,
 0.732728123664856,
 0.7322820425033569,
 0.7319163680076599,
 0.7304933071136475,
 0.730053722858429,
 0.7288756966590881,
 0.7281818985939026,
 0.7281191945075989,
 0.727988064289093,
 0.7277131676673889,
 0.726827085018158,
 0.7263097763061523,
 0.7255817651748657,
 0.7255740761756897,
 0.7253108024597168,
 0.7246365547180176,
 0.7239059209823608,
 0.7233874797821045,
 0.7230355143547058,
 0.723025918006897,
 0.7225368022918701,
 0.7217145562171936,
 0.7210047841072083,
 0.7206993699073792,
 0.7205953001976013,
 0.7205812335014343,
 0.719957172870636,
 0.7198128700256348,
 0.7192848324775696,
 0.7190936803817749,
 0.7181803584098816,
 0.7170781493186951,
 0.7161182165145874,
 0.7156762480735779,
 0.7153546810150146,
 0.7148224711418152,
 0.7141834497451782,
 0.7140740752220154,
 0.7137510776519775,
 0.7134366035461426,
 0.7130857706069946,
 0.7130664587020874,
 0.7126566767692566,
 0.7121343612670898,
 0.7115955352783203,
 0.7110877633094788,
 0.7108469009399414,
 0.7104701399803162,
 0.7101932168006897,
 0.7093899846076965,
 0.7090077996253967,
 0.7088366150856018,
 0.7085787653923035,
 0.7084393501281738,
 0.7083696126937866,
 0.7082640528678894,
 0.7068285942077637,
 0.7065154314041138,
 0.7061051726341248,
 0.7060731053352356,
 0.706041693687439,
 0.7057188153266907,
 0.704667866230011,
 0.7045057415962219,
 0.7043561339378357,
 0.7041466236114502,
 0.7040076851844788,
 0.7035585641860962,
 0.7030852437019348,
 0.7029399871826172,
 0.7028341293334961,
 0.7018325328826904,
 0.7014175057411194,
 0.7009782791137695,
 0.7006656527519226,
 0.6993235945701599,
 0.6986374855041504,
 0.6984686255455017,
 0.6980193257331848,
 0.6978328227996826,
 0.6967384219169617,
 0.6964960694313049,
 0.6964261531829834,
 0.6963183879852295,
 0.6954740881919861,
 0.6952798366546631,
 0.695203423500061,
 0.6944896578788757,
 0.6942644119262695,
 0.6942504644393921,
 0.6940212249755859,
 0.693926990032196,
 0.693915843963623,
 0.69270259141922,
 0.6923828721046448,
 0.6921383738517761,
 0.6912344694137573,
 0.6909181475639343,
 0.6908226013183594,
 0.6903275847434998,
 0.6885342001914978,
 0.6880930662155151,
 0.687547504901886,
 0.6873355507850647,
 0.6869430541992188,
 0.6868414878845215,
 0.6866506934165955,
 0.6856019496917725,
 0.6853703260421753,
 0.684950053691864,
 0.6844768524169922,
 0.6843836903572083,
 0.6843326687812805,
 0.6840593218803406,
 0.6839963793754578,
 0.6833631992340088,
 0.6820196509361267,
 0.6815665364265442,
 0.6803432703018188,
 0.6799181699752808,
 0.679553210735321,
 0.678919792175293,
 0.6787182688713074,
 0.6786744594573975,
 0.6782424449920654,
 0.6780423521995544,
 0.6777185201644897,
 0.6777063608169556,
 0.6776937246322632,
 0.6772192120552063,
 0.6769409775733948,
 0.6760160326957703,
 0.6759694814682007,
 0.6755587458610535,
 0.675423800945282,
 0.675076961517334,
 0.6750593781471252,
 0.6749622821807861,
 0.674652636051178,
 0.674209713935852,
 0.6741954684257507,
 0.6737287640571594,
 0.6736929416656494,
 0.6729382276535034,
 0.6728609204292297,
 0.6727287173271179,
 0.6725167632102966,
 0.6722366809844971,
 0.6722153425216675,
 0.6721881628036499,
 0.6720960140228271,
 0.671940267086029,
 0.6713663339614868,
 0.6698254346847534,
 0.6695573329925537,
 0.6687107682228088,
 0.6684702634811401,
 0.6678615808486938,
 0.6676300764083862,
 0.6671851277351379,
 0.6671191453933716,
 0.6668809652328491,
 0.6667686700820923,
 0.6666104197502136,
 0.6665686368942261,
 0.6664994955062866,
 0.6663007736206055,
 0.6657471656799316,
 0.6657465696334839,
 0.6657367944717407,
 0.6655275225639343,
 0.6652972102165222,
 0.6651871204376221,
 0.6651492118835449,
 0.6650216579437256,
 0.6648292541503906,
 0.6644583940505981,
 0.6641172766685486,
 0.6637612581253052,
 0.6635695099830627,
 0.6633908748626709,
 0.6629290580749512,
 0.6624594330787659,
 0.662200927734375,
 0.6617655158042908,
 0.6606206893920898,
 0.6597539186477661,
 0.6595001816749573,
 0.659498393535614,
 0.6579702496528625,
 0.657727062702179,
 0.6574667096138,
 0.656964123249054,
 0.6567689776420593,
 0.6556574702262878,
 0.655053436756134,
 0.6549975872039795,
 0.6542130708694458,
 0.6539175510406494,
 0.6536197662353516,
 0.6534443497657776,
 0.6531872153282166,
 0.6529080867767334,
 0.652268648147583,
 0.6522124409675598,
 0.6521428227424622,
 0.652125895023346,
 0.6517173647880554,
 0.6509376764297485,
 0.6509096622467041,
 0.6508827805519104,
 0.6505467891693115,
 0.6492860317230225,
 0.6486479043960571,
 0.6486145853996277,
 0.6485013365745544,
 0.6484130620956421,
 0.6481355428695679,
 0.647615909576416,
 0.6474666595458984,
 0.6465640068054199,
 0.6460104584693909,
 0.6459749937057495,
 0.6459360122680664,
 0.6455346345901489,
 0.6451497077941895,
 0.6451070308685303,
 0.6449517607688904,
 0.6449416875839233,
 0.6448999047279358,
 0.6447224020957947,
 0.6444844603538513,
 0.6444458365440369,
 0.6442970633506775,
 0.6436285972595215,
 0.6432618498802185,
 0.6431841850280762,
 0.6425811052322388,
 0.6417860984802246,
 0.6417686343193054,
 0.6416600346565247,
 0.6415826082229614,
 0.6414430141448975,
 0.6410348415374756,
 0.640630841255188,
 0.6404160261154175,
 0.6401026248931885,
 0.63993239402771,
 0.639882504940033,
 0.6395856142044067,
 0.639421820640564,
 0.6380833983421326,
 0.6379858255386353,
 0.6375442743301392,
 0.637353777885437,
 0.6371648907661438,
 0.6369909048080444,
 0.6369869112968445,
 0.6367626786231995,
 0.6362351179122925,
 0.636012613773346,
 0.6358704566955566,
 0.6356680393218994,
 0.6356462240219116,
 0.6355707049369812,
 0.6354865431785583,
 0.6353418827056885,
 0.6348565816879272,
 0.6345191597938538,
 0.6345074772834778,
 0.6344534754753113,
 0.6339588761329651,
 0.6336132884025574,
 0.6334683895111084,
 0.6332551836967468,
 0.6330251693725586,
 0.6329081058502197,
 0.6327518224716187,
 0.6318038702011108,
 0.6317944526672363,
 0.6317700147628784,
 0.6317121982574463,
 0.6316589713096619,
 0.6316245198249817,
 0.6313509941101074,
 0.6306886076927185,
 0.6305879950523376,
 0.6305497884750366,
 0.6305269002914429,
 0.630096971988678,
 0.6296277046203613,
 0.6295402646064758,
 0.6295005679130554,
 0.6292697787284851,
 0.6290936470031738,
 0.6286728382110596,
 0.6286492943763733,
 0.6285806894302368,
 0.6283563375473022,
 0.6279318332672119,
 0.6277845501899719,
 0.6277395486831665,
 0.6270812749862671,
 0.6268585920333862,
 0.6256161332130432,
 0.6254564523696899,
 0.6253827214241028,
 0.6252259016036987,
 0.6251668334007263,
 0.6251053810119629,
 0.6239646077156067,
 0.6235501170158386,
 0.623266875743866,
 0.6227879524230957,
 0.6223676800727844,
 0.6216578483581543,
 0.620872974395752,
 0.6207531690597534,
 0.6204898357391357,
 0.6199707388877869,
 0.619939923286438,
 0.619571328163147,
 0.6194667220115662,
 0.6189941167831421,
 0.6188552975654602,
 0.6185175776481628,
 0.6182923316955566,
 0.6180881261825562,
 0.6179910898208618,
 0.6179143786430359,
 0.6177471876144409,
 0.6172986030578613,
 0.6171382069587708,
 0.6170961856842041,
 0.6170250177383423,
 0.6169363260269165,
 0.61668461561203,
 0.6163849234580994,
 0.616259753704071,
 0.6160246133804321,
 0.6159703731536865,
 0.6155744194984436,
 0.6147223114967346,
 0.6145497560501099,
 0.6140955686569214,
 0.6140169501304626,
 0.6139309406280518,
 0.6138129830360413,
 0.613811731338501,
 0.6133521199226379,
 0.6131906509399414,
 0.6131694316864014,
 0.6130494475364685,
 0.612968921661377,
 0.6129478812217712,
 0.6116432547569275,
 0.6116387844085693,
 0.6113128662109375,
 0.6108748316764832,
 0.6107308864593506,
 0.6106475591659546,
 0.610262393951416,
 0.6101744174957275,
 0.6101717948913574,
 0.6101569533348083,
 0.6100338697433472,
 0.6099213361740112,
 0.6097745299339294,
 0.6095500588417053,
 0.6094909310340881,
 0.6094450354576111,
 0.6090566515922546,
 0.6087564826011658,
 0.6087318658828735,
 0.6083419919013977,
 0.6081241369247437,
 0.607871413230896,
 0.6077539920806885,
 0.6068829894065857,
 0.6065410375595093,
 0.6062430739402771,
 0.6058253645896912,
 0.6055247783660889,
 0.6053788065910339,
 0.6053472757339478,
 0.6052625775337219,
 0.6048938632011414,
 0.604708194732666,
 0.6043591499328613,
 0.6043391823768616,
 0.6043362021446228,
 0.6042796969413757,
 0.6038782596588135,
 0.6036362051963806,
 0.6035915613174438,
 0.6033117175102234,
 0.6032079458236694,
 0.6029709577560425,
 0.6028017997741699,
 0.6026512384414673,
 0.6024478673934937,
 0.602216899394989,
 0.6018061637878418,
 0.6017376184463501,
 0.6012206673622131,
 0.6007962226867676,
 0.6006240248680115,
 0.6005715131759644,
 0.6003936529159546,
 0.6003291606903076,
 0.6000486612319946,
 0.5996347665786743,
 0.5994622707366943,
 0.599257230758667,
 0.5991039276123047,
 0.5984320640563965,
 0.5983387231826782,
 0.5982711911201477,
 0.5978343486785889,
 0.5977898240089417,
 0.5977823734283447,
 0.5976536273956299,
 0.5973724722862244,
 0.5973426699638367,
 0.597022533416748,
 0.5968469381332397,
 0.5965945720672607,
 0.5965850353240967,
 0.5964047312736511,
 0.5962276458740234,
 0.5960531830787659,
 0.5958200097084045,
 0.595647394657135,
 0.5950970649719238,
 0.5949172377586365,
 0.5947761535644531,
 0.5942966938018799,
 0.5942276120185852,
 0.5939619541168213,
 0.5939454436302185,
 0.5937827229499817,
 0.5937712788581848,
 0.593761682510376,
 0.5936322212219238,
 0.5930981636047363,
 0.5929948091506958,
 0.592664361000061,
 0.5925729870796204,
 0.5924862623214722,
 0.5924296379089355,
 0.5921010375022888,
 0.591584324836731,
 0.5915680527687073,
 0.5915402173995972,
 0.5914790630340576,
 0.5913110375404358,
 0.5911639928817749,
 0.5910633206367493,
 0.5909237265586853,
 0.5906891226768494,
 0.5906646847724915,
 0.5903602838516235,
 0.5902671217918396,
 0.5898656845092773,
 0.5893831849098206,
 0.5891802310943604,
 0.5889486074447632,
 0.5889463424682617,
 0.5887507200241089,
 0.588686466217041,
 0.5885319113731384,
 0.588383138179779,
 0.5882313847541809,
 0.5881727933883667,
 0.5880797505378723,
 0.5878171324729919,
 0.587637186050415,
 0.5874754786491394,
 0.5874388217926025,
 0.5873831510543823,
 0.5872762203216553,
 0.5871644020080566,
 0.5870528817176819,
 0.5870521664619446,
 0.5866469740867615,
 0.5865029096603394,
 0.5861585140228271,
 0.5860657691955566,
 0.585838794708252,
 0.5858339071273804,
 0.5855178833007812,
 0.5853433609008789,
 0.5853050351142883,
 0.584908664226532,
 0.5847539901733398,
 0.5840827226638794,
 0.5840543508529663,
 0.5838108062744141,
 0.5837247967720032,
 0.5834543704986572,
 0.583451509475708,
 0.5830873250961304,
 0.5830100774765015,
 0.5828806757926941,
 0.5824500322341919,
 0.5822259187698364,
 0.5821603536605835,
 0.5813983678817749,
 0.5812755823135376,
 0.5811508893966675,
 0.5811348557472229,
 0.5810601115226746,
 0.5809980034828186,
 0.5809274315834045,
 0.5805932879447937,
 0.5805398225784302,
 0.5800670385360718,
 0.5799999833106995,
 0.579505443572998,
 0.5792343020439148,
 0.5792225003242493,
 0.5791121125221252,
 0.5790188312530518,
 0.5788261890411377,
 0.5787473320960999,
 0.5786319375038147,
 0.5783180594444275,
 0.578109622001648,
 0.5780721306800842,
 0.5780323147773743,
 0.5780043601989746,
 0.5771296620368958,
 0.5771205425262451,
 0.5770235657691956,
 0.5770174264907837,
 0.5769822597503662,
 0.5766937732696533,
 0.5766468048095703,
 0.5765354037284851,
 0.575879693031311,
 0.5758066773414612,
 0.5757375955581665,
 0.5753865838050842,
 0.5751538276672363,
 0.5751133561134338,
 0.5749435424804688,
 ...]

In [9]:
percentile99 = np.percentile(df['residual'], 99)

In [10]:
sns.distplot(df['residual'].sample(n=10000),  kde=False, rug=True)
plt.show()



In [11]:
len(df)


Out[11]:
42798

In [12]:
plt.close()

In [13]:
residuals = df['residual'].values.tolist()

In [14]:
np.max(residuals) - np.min(residuals)


Out[14]:
60.033355519175529

In [15]:
def RadiantDireDifference(row, column):
    faction = ['radiant', 'dire']
    poss = ['pos1', 'pos2', 'pos3', 'pos4', 'pos5']
    radiantSum = 0
    direSum = 0
    for pos in poss:
        radiantSum += row['radiant_'+ pos + '_' + column]
        direSum += row['dire_'+ pos + '_' + column]
    return radiantSum - direSum

def reject_outliers(data, m=1.5):
    return abs(data - np.mean(data)) < m * np.std(data)


def heroKDAdeficit(row):
    faction = ['radiant', 'dire']
    poss = ['pos1', 'pos2', 'pos3', 'pos4', 'pos5']
    largestAbsoluteSoFar = -1
    returnVal = -1
    for pos in poss:
        for fac in faction:
            value = row[fac + '_' + pos + '_kills'] + \
                        row[fac + '_' + pos + '_assists'] - \
                        row[fac + '_' + pos + '_deaths']
            if abs(value) >= largestAbsoluteSoFar:
                largestAbsoluteSoFar = abs(value)
                returnVal = value
    return returnVal

def heroKDdeficit(row):
    faction = ['radiant', 'dire']
    poss = ['pos1', 'pos2', 'pos3', 'pos4', 'pos5']
    largestAbsoluteSoFar = -1
    returnVal = -1
    for pos in poss:
        for fac in faction:
            value = row[fac + '_' + pos + '_kills'] - \
                        row[fac + '_' + pos + '_deaths']
            if abs(value) >= largestAbsoluteSoFar:
                largestAbsoluteSoFar = abs(value)
                returnVal = value
    return returnVal

def heroKD(row):
    faction = ['radiant', 'dire']
    poss = ['pos1', 'pos2', 'pos3', 'pos4', 'pos5']
    largestAbsoluteSoFar = -1
    returnVal = -1
    for pos in poss:
        for fac in faction:
            value = row[fac + '_' + pos + '_kills'] - \
                        row[fac + '_' + pos + '_deaths']
            if abs(value) >= largestAbsoluteSoFar:
                largestAbsoluteSoFar = abs(value)
                returnVal = int(pos[3:4])
    return returnVal

def heroGoldSpent(row):
    faction = ['radiant', 'dire']
    poss = ['pos1', 'pos2', 'pos3', 'pos4', 'pos5']
    largestAbsoluteSoFar = -1
    returnVal = -1
    for pos in poss:
        for fac in faction:
            value = row[fac + '_' + pos + '_gold_spent']
            if abs(value) >= largestAbsoluteSoFar:
                largestAbsoluteSoFar = abs(value)
                returnVal = int(pos[3:4])
    return returnVal

In [16]:
vote_mask = (df['positive_votes'] == 0) & (df['negative_votes'] == 0)
def outlier_mask(data, other_mask = vote_mask, m = 2.):
    d = np.abs(data - np.median(data))
    mdev = np.median(d)
    s = d/mdev if mdev else 0.
    return np.logical_and(s<m, other_mask)

In [17]:
kill_difference = np.abs(df.apply(RadiantDireDifference, args=('kills',), axis=1))
kill_difference_mask = outlier_mask(kill_difference)
sns.regplot(kill_difference[kill_difference_mask], np.log(df['residual'][kill_difference_mask]))
ax = plt.gca()
ax.set_xlabel("Kill difference")
ax.set_ylabel("Logarithm of Residual")
plt.savefig('graphs/Kill_Difference.png', bbox_inches='tight')
plt.show()



In [18]:
gold_difference = np.abs(df.apply(RadiantDireDifference, args=('gpm',), axis=1))
gold_difference_mask = outlier_mask(gold_difference)
sns.regplot(np.abs(gold_difference)[gold_difference_mask], df['residual'][gold_difference_mask])
ax = plt.gca()
ax.set_xlabel("GPM difference")
ax.set_ylabel("Raw Residual")
plt.savefig('graphs/GPM_Difference.png', bbox_inches='tight')
plt.show()



In [19]:
kda = df.apply(heroKDAdeficit, axis=1)
kda_mask = outlier_mask(kda)
sns.regplot(kda[kda_mask], df['residual'][kda_mask])
ax = plt.gca()
ax.set_xlabel("K + A - D")
ax.set_ylabel("Raw Residual")
plt.savefig('graphs/MaxKAD_Difference.png', bbox_inches='tight')
plt.show()



In [20]:
kd = df.apply(heroKDdeficit, axis=1)
kd_mask = outlier_mask(kd)
sns.regplot(np.abs(kd)[kd_mask], df['residual'][kd_mask])
ax = plt.gca()
ax.set_xlabel("Absolute(K - D)")
ax.set_ylabel("Raw Residual")
plt.savefig('graphs/absolutek-d_Difference.png', bbox_inches='tight')
plt.show()



In [21]:
direpos2kills = df['dire_pos2_kills']
direpos2kills_mask = outlier_mask(direpos2kills)
sns.regplot(df['dire_pos2_kills'][direpos2kills_mask], df['residual'][direpos2kills_mask])
ax = plt.gca()
ax.set_xlabel("Dire Pos 2 Kills")
ax.set_ylabel("Raw Residual")
plt.savefig('graphs/direpos2kills.png', bbox_inches='tight')
plt.show()



In [22]:
radiantpos1kills = df['radiant_pos1_kills']
radiantpos1kills_mask = outlier_mask(radiantpos1kills)
sns.regplot(df['radiant_pos1_kills'][radiantpos1kills_mask], df['residual'][radiantpos1kills_mask])
ax = plt.gca()
ax.set_xlabel("Radiant Pos 1 Kills")
ax.set_ylabel("Raw Residual")
plt.savefig('graphs/radiantpos1kills.png', bbox_inches='tight')
plt.show()



In [23]:
radiantpos1kd = df['radiant_pos1_kills'] - df['radiant_pos1_deaths']
radiantpos1kd_mask = outlier_mask(radiantpos1kd)
sns.regplot(radiantpos1kd[radiantpos1kd_mask], df['residual'][radiantpos1kd_mask])
ax = plt.gca()
ax.set_xlabel("Radiant Pos 1 Kills - Deaths")
ax.set_ylabel("Raw Residual")
plt.savefig('graphs/radiantpos1killsminusdeaths.png', bbox_inches='tight')
plt.show()



In [24]:
radiantpos5kd = df['radiant_pos5_kills'] - df['radiant_pos5_deaths']
radiantpos5kd_mask = outlier_mask(radiantpos5kd)
sns.regplot(radiantpos5kd[radiantpos5kd_mask], df['residual'][radiantpos5kd_mask])
ax = plt.gca()
ax.set_xlabel("Radiant Pos 5 Kills - Deaths")
ax.set_ylabel("Raw Residual")
plt.savefig('graphs/radiantpos5killsminusdeaths.png', bbox_inches='tight')
plt.show()



In [25]:
duration = df['duration']
duration_mask = outlier_mask(duration)
sns.regplot(df['duration'][duration_mask], df['residual'][duration_mask])
ax = plt.gca()
ax.set_xlabel("Duration")
ax.set_ylabel("Raw Residual")
plt.savefig('graphs/durationresidual.png', bbox_inches='tight')
plt.show()



In [26]:
df[df['residual'] > percentile99]


Out[26]:
match_id positive_votes negative_votes first_blood_time radiant_win duration dire_pos1_ancient_kills dire_pos1_courier_kills dire_pos1_kills dire_pos1_neutral_kills ... dire_pos2_abandons dire_pos3_abandons dire_pos4_abandons dire_pos5_abandons radiant_pos1_abandons radiant_pos2_abandons radiant_pos3_abandons radiant_pos4_abandons radiant_pos5_abandons residual
40056 3215497967 49 2 14 True 2748 43.0 0.0 4 141.0 ... 0 0 0 0 0 0 0 0 0 60.093830
3222 3215097220 15 7 15 True 2470 23.0 0.0 7 108.0 ... 0 0 0 0 0 0 0 0 0 22.980583
42706 3215312675 9 4 0 False 2687 0.0 0.0 10 9.0 ... 0 0 0 0 0 0 0 0 0 14.018461
14505 3215162338 0 0 135 False 3981 18.0 21.0 27 70.0 ... 0 0 0 0 0 0 0 0 0 12.276542
8310 3215181105 0 0 0 True 0 NaN NaN 0 NaN ... 0 0 0 0 0 0 0 1 0 10.960655
6186 3215377994 0 0 72 False 2924 NaN NaN 28 NaN ... 0 0 0 0 0 0 0 0 0 10.190336
10574 3215271631 7 1 3 False 1846 4.0 0.0 5 36.0 ... 0 0 0 0 0 0 0 0 0 9.797115
28970 3215289100 0 0 0 False 0 NaN NaN 0 NaN ... 0 0 1 0 0 0 0 0 0 8.665862
32230 3215279863 0 0 0 False 0 NaN NaN 0 NaN ... 0 0 1 0 0 0 0 0 0 8.291083
1609 3215253055 5 0 182 True 1949 0.0 0.0 4 10.0 ... 0 0 0 0 0 0 0 0 0 6.753566
41758 3215448302 0 0 208 False 7375 4.0 1.0 21 61.0 ... 0 0 0 0 0 0 0 0 0 5.982767
10214 3215416370 0 0 0 False 0 NaN NaN 0 NaN ... 0 0 0 0 0 0 0 1 0 5.009284
42735 3215261563 0 0 0 True 0 NaN NaN 0 NaN ... 1 0 0 0 0 0 0 0 0 4.787800
9080 3215294948 4 1 94 True 1804 0.0 0.0 4 19.0 ... 0 0 0 0 0 0 0 0 0 4.737586
8192 3215155134 0 0 0 True 780 0.0 0.0 0 0.0 ... 0 0 0 0 0 0 0 0 0 4.151326
12556 3215350255 0 0 0 True 0 NaN NaN 0 NaN ... 0 0 1 0 0 0 0 0 0 3.858672
15967 3215341221 0 0 0 False 932 NaN NaN 34 NaN ... 0 0 0 0 0 0 0 0 0 3.720057
21049 3215306187 0 0 0 True 0 NaN NaN 0 NaN ... 0 1 0 0 0 0 0 0 0 3.653452
5501 3215302636 0 0 0 False 0 NaN NaN 0 NaN ... 0 0 0 0 0 0 0 1 0 3.612013
28073 3215316820 0 0 0 False 0 NaN NaN 0 NaN ... 0 0 0 0 0 0 0 1 0 3.557962
37873 3215190109 0 0 324 False 4877 3.0 0.0 6 24.0 ... 0 0 0 0 0 1 0 0 0 3.524932
19538 3215355847 0 0 0 False 0 NaN NaN 0 NaN ... 0 0 0 0 0 0 0 1 0 3.455053
18809 3215130854 0 0 9 True 2760 0.0 0.0 3 0.0 ... 0 0 0 0 0 0 0 0 0 3.430158
37087 3215259897 0 0 0 True 1394 3.0 0.0 1 3.0 ... 0 0 0 0 0 0 0 0 0 2.926127
16205 3215394671 0 0 0 True 0 NaN NaN 0 NaN ... 0 0 1 0 0 0 0 0 0 2.813246
37604 3215152225 0 0 0 True 2641 NaN NaN 10 NaN ... 0 0 0 0 0 0 0 0 0 2.767648
37101 3215274711 0 0 0 True 858 NaN NaN 0 NaN ... 0 0 0 0 0 0 0 0 0 2.758488
24635 3215366609 0 0 0 False 0 NaN NaN 0 NaN ... 0 0 0 0 0 0 0 1 0 2.543581
1345 3215449992 0 0 104 False 5045 60.0 0.0 18 317.0 ... 0 0 0 0 0 0 0 0 0 2.515880
26691 3215376405 0 0 0 True 0 NaN NaN 0 NaN ... 0 0 1 0 0 0 0 0 0 2.483158
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
20347 3215337531 0 0 62 False 3373 NaN NaN 1 NaN ... 0 0 0 0 0 1 0 1 0 0.769491
15810 3215102476 0 0 137 False 3312 45.0 0.0 8 350.0 ... 0 0 0 0 0 0 0 0 0 0.767143
27774 3215253869 0 0 292 False 5234 NaN NaN 7 NaN ... 0 0 0 0 0 0 0 0 0 0.766957
27894 3215422166 0 0 118 False 2817 47.0 1.0 10 155.0 ... 0 0 0 0 0 0 0 0 0 0.765579
12285 3215263577 0 0 0 False 1143 NaN NaN 35 NaN ... 0 0 0 0 0 0 0 0 0 0.765576
20308 3215285461 0 0 61 False 2682 20.0 0.0 10 74.0 ... 0 0 0 0 0 0 0 0 0 0.764146
8590 3215114749 0 0 0 False 746 NaN NaN 3 NaN ... 0 0 0 0 1 0 1 1 1 0.763206
14222 3215096854 0 0 93 True 2936 21.0 0.0 5 208.0 ... 0 0 0 0 0 0 0 0 0 0.762437
8943 3215446534 0 0 218 True 1714 0.0 0.0 1 25.0 ... 0 0 0 0 0 0 0 0 0 0.761831
21170 3215119491 0 0 169 True 4051 75.0 0.0 12 354.0 ... 0 0 0 0 0 0 0 0 0 0.761589
9693 3215205453 0 0 140 False 2083 NaN NaN 3 NaN ... 0 1 0 1 0 0 0 0 0 0.759363
10663 3215135325 0 0 0 True 3645 23.0 0.0 4 143.0 ... 0 0 0 0 0 0 0 0 1 0.759099
11652 3215415463 0 0 191 False 2846 31.0 0.0 8 175.0 ... 0 0 0 0 0 0 0 0 0 0.758527
30828 3215322189 0 0 93 True 2680 0.0 0.0 3 12.0 ... 0 1 0 0 0 0 0 0 0 0.758093
15394 3215345510 0 0 307 True 4429 NaN NaN 10 NaN ... 0 0 0 1 0 0 0 1 0 0.757711
7846 3215138506 0 0 160 False 1780 NaN NaN 4 NaN ... 1 1 1 0 0 0 0 0 0 0.757519
37444 3215492929 0 0 73 False 3048 20.0 1.0 8 115.0 ... 0 1 0 0 0 0 0 0 0 0.757248
19730 3215267165 0 0 2 True 2125 NaN NaN 6 NaN ... 0 0 0 0 1 1 1 0 0 0.756915
32004 3215123467 0 0 224 False 3050 0.0 0.0 2 12.0 ... 0 0 0 0 0 0 0 0 0 0.756316
288 3215358156 0 0 163 True 4185 24.0 0.0 7 55.0 ... 0 0 0 0 0 0 0 0 0 0.754428
36615 3215094142 0 0 203 True 4973 NaN NaN 4 NaN ... 0 0 0 0 0 0 0 0 0 0.754374
11493 3215328883 0 0 3 True 3377 37.0 0.0 12 179.0 ... 0 0 0 0 0 0 0 0 0 0.753980
39744 3215250045 0 0 184 True 4439 44.0 0.0 31 132.0 ... 0 0 0 0 0 0 0 0 0 0.753625
9634 3215410384 0 0 0 True 344 NaN NaN 0 NaN ... 0 0 0 0 0 0 0 0 0 0.753601
17709 3215168749 0 0 0 True 413 0.0 0.0 0 0.0 ... 0 0 1 0 0 0 0 0 0 0.753267
27443 3215343292 0 0 86 True 3411 NaN NaN 7 NaN ... 0 0 0 1 0 0 0 1 0 0.753174
39465 3215107300 0 0 108 True 2965 8.0 0.0 9 88.0 ... 0 0 0 0 0 0 0 0 0 0.752881
29673 3215450701 0 0 90 False 2907 45.0 1.0 8 197.0 ... 0 0 0 0 0 0 0 0 0 0.752879
22278 3215239658 0 0 126 False 3992 NaN NaN 10 NaN ... 0 0 1 0 0 0 0 0 0 0.752695
32637 3215427391 0 0 169 True 4418 NaN NaN 6 NaN ... 0 1 0 0 0 0 0 0 0 0.751674

428 rows × 247 columns


In [27]:
df[df['residual'] < percentile99].sample(n=10, random_state=1)


Out[27]:
match_id positive_votes negative_votes first_blood_time radiant_win duration dire_pos1_ancient_kills dire_pos1_courier_kills dire_pos1_kills dire_pos1_neutral_kills ... dire_pos2_abandons dire_pos3_abandons dire_pos4_abandons dire_pos5_abandons radiant_pos1_abandons radiant_pos2_abandons radiant_pos3_abandons radiant_pos4_abandons radiant_pos5_abandons residual
15328 3215105241 0 0 112 False 2777 NaN NaN 24 NaN ... 0 0 0 0 0 0 0 0 0 0.174453
26872 3215376151 0 0 114 True 2399 NaN NaN 7 NaN ... 0 0 0 0 0 0 0 0 0 0.108168
9558 3215415736 0 0 169 False 3833 NaN NaN 20 NaN ... 0 0 1 0 0 0 0 0 0 0.539407
38056 3215109078 0 0 191 False 3583 1.0 0.0 15 51.0 ... 0 0 0 0 0 0 0 0 0 0.522020
28038 3215171445 0 0 106 True 3963 NaN NaN 14 NaN ... 0 0 0 0 0 0 0 0 0 0.219839
19879 3215172311 1 0 60 True 4322 NaN NaN 4 NaN ... 0 0 0 0 0 0 0 0 0 0.478943
23736 3215186932 0 0 402 True 2640 NaN NaN 10 NaN ... 0 0 0 0 0 0 0 0 0 0.108369
24764 3215409668 0 0 179 False 2451 NaN NaN 9 NaN ... 0 0 0 0 0 0 1 0 0 0.138435
3126 3215384337 0 0 146 True 3289 NaN NaN 8 NaN ... 0 0 0 0 0 0 1 0 0 0.514544
5952 3215141313 0 0 126 False 2572 3.0 0.0 11 116.0 ... 0 0 0 0 0 0 0 0 0 0.341837

10 rows × 247 columns


In [28]:
biggestAnom = np.loadtxt('data/anomalies.csv', delimiter = ',')
theirOutput = np.loadtxt('data/output.csv', delimiter = ",")

In [29]:
biggestAnom = np.column_stack([df['match_id'].values, biggestAnom])
theirOutput = np.column_stack([df['match_id'].values, theirOutput])

In [30]:
dictToEndAllDicts = {'match_id': [], 'column': [], 'actual': [], 'predicted': []}

In [31]:
def whichIndex(row1, row2):
    largestError = -1
    index = -1
    for i in range(1, len(row1)):
        if abs(row1[i] - row2[i]) > largestError:
            largestError = abs(row1[i] - row2[i])
            index = i
    return index
columns = list(df.columns)
columns.remove('match_id')
mask = np.where(df['residual'] >= percentile99)[0]

biggestAnom = biggestAnom[mask, :]
theirOutput = theirOutput[mask, :]

for i in range(len(biggestAnom)):
    index = whichIndex(biggestAnom[i, :], theirOutput[i, :])
    dictToEndAllDicts['match_id'].append(int(biggestAnom[i, 0]))
    dictToEndAllDicts['column'].append(columns[index])
    dictToEndAllDicts['actual'].append(biggestAnom[i, index])
    dictToEndAllDicts['predicted'].append(theirOutput[i, index])

In [32]:
tableOfAnomalies = pd.DataFrame.from_dict(dictToEndAllDicts)

In [33]:
tableOfAnomalies.to_csv('data/tableOfAnomaliesAndTheirCause.csv', index = False)

In [34]:
tableOfAnomalies


Out[34]:
actual column match_id predicted
0 49.000000 negative_votes 3215497967 1.000000
1 15.000000 negative_votes 3215097220 1.000000
2 9.000000 negative_votes 3215312675 1.000000
3 0.997236 dire_pos5_courier_kills 3215162338 0.254108
4 0.967832 radiant_pos3_gpm 3215181105 0.215278
5 0.986188 dire_pos5_kda 3215377994 0.675354
6 7.000000 negative_votes 3215271631 1.000000
7 0.915403 dire_pos4_gpm 3215289100 0.162801
8 0.915403 dire_pos4_gpm 3215279863 0.155206
9 5.000000 negative_votes 3215253055 1.000000
10 0.977942 radiant_pos1_sentry_uses 3215448302 0.730816
11 0.023621 dire_pos3_gpm 3215416370 0.595808
12 0.243854 radiant_pos3_gpm 3215261563 0.536931
13 4.000000 negative_votes 3215294948 1.000000
14 0.996014 radiant_pos5_kills 3215155134 0.729708
15 0.172037 radiant_pos4_gpm 3215350255 0.478153
16 0.765351 radiant_pos1_kpm 3215341221 0.473024
17 0.796921 radiant_pos2_gpm 3215306187 0.594073
18 0.995038 dire_pos4_gpm 3215302636 0.616405
19 0.995249 dire_pos5_gpm 3215316820 0.661066
20 0.991397 dire_pos3_sentry_uses 3215190109 0.725191
21 0.994378 dire_pos2_gpm 3215355847 0.634865
22 0.992697 radiant_pos3_kills 3215130854 0.734208
23 0.995991 radiant_pos1_kills 3215259897 0.731750
24 0.420948 dire_pos1_totalgold 3215394671 0.155802
25 0.853668 dire_pos4_deaths 3215152225 0.677466
26 0.970540 radiant_pos2_kpm 3215274711 0.548053
27 0.417960 dire_pos3_xpm 3215366609 0.140712
28 0.837322 radiant_pos2_courier_kills 3215449992 0.687711
29 0.417725 radiant_pos2_xpm 3215376405 0.142942
... ... ... ... ...
398 0.508021 dire_pos1_totalxp 3215337531 0.461330
399 0.987752 radiant_pos5_hero_heal 3215102476 0.764587
400 0.602995 dire_pos4_lasthits 3215253869 0.546018
401 0.068665 dire_pos5_courier_kills 3215422166 0.011190
402 0.602511 radiant_pos3_deaths 3215263577 0.545937
403 0.647322 radiant_pos2_courier_kills 3215285461 0.583701
404 0.977324 residual 3215114749 0.947976
405 0.728203 dire_pos5_courier_kills 3215096854 0.634661
406 0.242480 radiant_pos3_sentry_uses 3215446534 0.059556
407 0.586169 dire_pos3_observer_uses 3215119491 0.541193
408 -0.022676 residual 3215205453 -0.055943
409 0.568367 radiant_pos3_ancient_kills 3215135325 0.522728
410 0.117480 radiant_pos3_sentry_uses 3215415463 0.008837
411 0.668367 radiant_pos3_tower_kills 3215322189 0.594004
412 0.976364 radiant_pos1_abandons 3215345510 0.948507
413 -0.022676 residual 3215138506 -0.061634
414 0.653263 dire_pos1_abandons 3215492929 0.580061
415 0.612837 radiant_pos5_totalgold 3215267165 0.538579
416 0.754499 dire_pos2_observer_uses 3215123467 0.658702
417 0.032951 dire_pos5_courier_kills 3215358156 0.000771
418 0.786506 radiant_pos5_hero_dmg 3215094142 0.659190
419 0.711157 radiant_pos1_courier_kills 3215328883 0.623964
420 0.032951 dire_pos5_courier_kills 3215250045 -0.003149
421 0.336952 radiant_pos2_kpm 3215410384 0.213571
422 0.613161 dire_pos1_kda 3215168749 0.543565
423 0.976364 radiant_pos1_abandons 3215343292 0.948496
424 0.641713 radiant_pos1_ancient_kills 3215107300 0.576132
425 0.117480 radiant_pos3_sentry_uses 3215450701 0.033871
426 0.863760 dire_pos1_xpm 3215239658 0.700287
427 0.523910 radiant_pos1_totalgold 3215427391 0.481955

428 rows × 4 columns


In [ ]: