notebook.community

Edit and run



In [208]:

    
#!/usr/bin/python

import ast
import hashlib
import collections
import pandas as pd
import numpy as np
import re


def createDictTGE(input, replayId):
    '''
    Converts raw heroprotocol outputs --trackerevents, --gameevents to a <dict>.
    @param <file> input: raw heroprotocol outputs --trackerevents, --gameevents
    @return: <dictionary> of raw heroprotocol outputs
    '''
    with open(input, 'r') as f:
        lines = f.readlines()

    temp_string = ''
    for i in range(len(lines)):
        line = lines[i]
        if i == 0:
            temp_string += "[" + line.rstrip()
            continue
        try:
            if lines[i + 1][0] == '{':
                line = line.rstrip()
                temp_string += line + ','
                continue
        except:
            pass
        temp_string += line.rstrip()

    temp_string += "]"

    dictEvents = ast.literal_eval(temp_string)

    for i in dictEvents:
        i['replayId'] = replayId

    return dictEvents


def prepDictTE(listTE, replayId):
    '''
    Flattens tracker events <dict> and returns 4 sub-<dict> ready for Pandas <DataFrame> conversion.  Blank values
    are populated with np.nan.
    @param <list> listTE: <list> of <dict> of --trackerevents, the output of createDictTGE
    @return: <list> dictTE, <dict> m_intData, <dict> m_stringData, <dict> m_fixedData
    '''
    # initialize keys of parent table
    parentKeys = []
    parentTE = {}
    for d in listTE:
        for k in d.keys():
            if k not in parentKeys:
                parentKeys.append(k)
        if 'm_instanceList' in d:
            summary = d
    for k in parentKeys:
        parentTE[k] = []

    # populate parent table
    listLength = 0
    for d in listTE:
        listLength += 1
        for i in d:
            parentTE[i].append(d[i])
        for e in parentTE:
            if len(parentTE[e]) < listLength:
                parentTE[e].append(np.nan)

    parentTE['replayId'] = [replayId] * len(listTE)

    # clean parentTE
    cleanParentTE = ['m_instanceList', 'm_items', 'm_count', 'm_killerUnitTagIndex', 'm_killerUnitTagRecycle',
                     'm_slotId', 'm_upgradeTypeName', 'm_upkeepPlayerId', 'm_type']
    for i in cleanParentTE:
        parentTE.pop(i, None)

    # clean the values of parentTE['m_intData', 'm_stringData', 'm_fixedData']
    # keep same order as listOfDicts and subKeys below
    parentClean = ['m_intData', 'm_stringData', 'm_fixedData']
    for i in parentClean:
        cleanTESubDict(parentTE[i])

    # initialize m_intData, m_stringData, m_fixedData keys
    intDataKeys = initializeTESubKeys(parentTE['m_intData'])
    stringDataKeys = initializeTESubKeys(parentTE['m_stringData'])
    fixedDataKeys = initializeTESubKeys(parentTE['m_fixedData'])

    # initialize sub tables
    m_intData, m_stringData, m_fixedData = {}, {}, {}
    listOfDicts = [m_intData, m_stringData, m_fixedData]
    # order of subkeys should match listOfDicts <dict> above
    subKeys = [intDataKeys, stringDataKeys, fixedDataKeys]

    # initialize keys in <dict> m_int, m_string, m_fixedData
    for i in range(len(listOfDicts)):
        for key in subKeys[i]:
            listOfDicts[i][key] = []
        listOfDicts[i]['replayId'] = []
        listOfDicts[i]['_gameloop'] = []
        listOfDicts[i]['_bits'] = []
        listOfDicts[i]['_eventid'] = []

    # populate values for m_intData, m_stringData, m_fixedData
    for i in range(len(listOfDicts)):
        populateTESubDicts(parentTE, listOfDicts[i], parentClean[i])

    # remove 'GameTime', 'PreviousGameTime', 'Time' from m_fixedData
    fixedDataClean = ['GameTime', 'PreviousGameTime', 'Time']
    for i in fixedDataClean:
        m_fixedData.pop(i, None)

    # edit m_stringData "Hero" key to remove 'Hero' prefix from values
    temp = []
    for i in m_stringData['Hero']:
        if isinstance(i, str) or isinstance(i, unicode):
            temp.append(i.replace('Hero', ''))
        else:
            temp.append(i)
    m_stringData['Hero'] = temp

    # standardize PlayerID to m_userId reporting in m_intData, before{range(1,11)}, after{range(0,10)}
    m_intData['m_userId'] = m_intData.pop('PlayerID')
    m_intData['m_userId'][:] = [x - 1 for x in m_intData['m_userId']]
    # adjust m_intData['KillingPlayer'] from range(1,11) to range(0,10) for consistency on userId
    m_intData['KillingPlayer'] = [x - 1 for x in m_intData['KillingPlayer']]
    # adjust m_intData['TeamLevel'] from range(1,3) to range(0,2)
    m_intData['Team'] = [x - 1 for x in m_intData['Team']]

    # remove m_playerId, m_intData, m_stringData, m_fixedData from parentTE
    parentTE.pop('m_playerId', None)
    parentTE.pop('m_intData', None)
    parentTE.pop('m_stringData', None)
    parentTE.pop('m_fixedData', None)

    return parentTE, m_intData, m_stringData, m_fixedData, summary


def cleanTESubDict(subDict):
    '''
    prepDictTE() helper function to format parentTE['m_intData', 'm_stringData', 'm_fixedData'].
    @param <dict> subDict: sub <dict> to the parent tracker events <dict>
    '''
    for i in subDict:
        takeAction = True
        if isinstance(i, list):
            # i is a list of dictionaries associated with a tracker event
            temp = []
            for d in i:
                # record value of 'm_key' and 'm_value'
                try:
                    key = d['m_key']
                    value = d['m_value']
                    # add to new temp list as a dict
                    temp.append({key: value})
                except:
                    takeAction = False
                    continue
            # after iterating through all d in current list, clear list
            if takeAction:
                i[:] = []
                # set current list equal to temp list
                for d in temp:
                    i.append(d)


def populateTESubDicts(parentTE, subDict, dictName):
    '''
    prepDictTE() helper function to populate the sub <dict>.
    @param <dict> parentTE: parent <dict> of tracker events
    @param: <dict> subDict: <dict> to populate
    @param: <str> dictName: <str> corresponding to <dict> name
    '''
    for i in range(len(parentTE[dictName])):
        # entry is a list of <dict>s
        # e.g. entry = [{'PlayerID': 8}, {'KillingPlayer': 1}, {'KillingPlayer': 2}]
        entry = parentTE[dictName][i]
        if isinstance(entry, list):
            isDuplicates, duplicateKeys = isDuplicateKeys(entry)
            if not isDuplicates:
                # populate all pertinent keys with one element
                populateFromEntry(parentTE, subDict, entry, i)
            # case where there are multiple instances of 'KillerPlayer' associated with one 'PlayerID'
            else:
                dupeIndex = range(1, len(entry))
                for num in dupeIndex:
                    newEntry = []
                    newEntry.append(entry[0])
                    newEntry.append(entry[num])  # this value needs to be range(1, len(entry))
                    populateFromEntry(parentTE, subDict, newEntry, i)


def initializeTESubKeys(subDict):
    '''
    prepDictTE() helper function that takes subDict from tracker events and creates a comprehensive list of keys.
    @param <dict> subDict: <dict> to collect list of keys from
    @return: <list> of all keys found in that subDict
    '''
    listOfKeys = []
    for i in subDict:
        if isinstance(i, list):
            for d in i:
                for k in d.keys():
                    if k not in listOfKeys:
                        listOfKeys.append(k)

    return listOfKeys


def isDuplicateKeys(entry):
    '''
    Checks if duplicate keys exist; e.g. multiple copies of 'KillingPlayer' associated with one 'PlayerID', and
    creates a new entry to tie each 'KillingPlayer' to 'PlayerID'.  Necessary to ensure all keys in <dict> have
    <list> values of equal length.
    @param <list> entry: a <list> of <dict>
    @return <bool> isDuplicates: True or False
    @return <list> duplicateKeys: <list> of all duplicate keys
    '''
    keys = []
    for d in entry:
        for k in d:
            keys.append(k)

    duplicateKeys = []
    isDuplicates = False
    for key in keys:
        total = keys.count(key)
        if total > 1:
            duplicateKeys.append(key)
            isDuplicates = True

    return isDuplicates, duplicateKeys


def populateFromEntry(parentTE, subDict, entry, i):
    '''
    prepTESubDicts() helper function that works with isDuplicateKeys() to create separate entries for each duplicate
    key.
    @param <dict> parentTE: the parent <dict>
    @param <dict> subDict: sub of the parent
    @param <list> entry: <list> of <dict> of all entries of the duplicate keys associated with one 'PlayerID'
    @param <int> i: index to keep track of position in entry
    '''
    subDict['replayId'].append(parentTE['replayId'][i])
    subDict['_gameloop'].append(parentTE['_gameloop'][i])
    subDict['_bits'].append(parentTE['_bits'][i])
    subDict['_eventid'].append(parentTE['_eventid'][i])
    for d in entry:
        for k in d:
            subDict[k].append(d[k])
    # then popluate non present keys with np.nan
    for k in subDict:
        if len(subDict[k]) != len(subDict['replayId']):
            subDict[k].append(np.nan)


def createDictInitData(initData, type="text"):
    '''
    Converts raw heroprotocol outputs --initdata to a <dict>.
    @param <file> initData: raw data output of heroprotocol --initdata
    @return: python <dict> of --initdata for replayId information and JSON conversion
    '''
    if type == "text":
        with open(initData, 'r') as f:
            lines = f.readlines()
        with open(initData, 'w') as f:
            dict_start = False
            for line in lines:
                if line[0] == '{' or dict_start:
                    dict_start = True
                    f.write(line)
                continue
        with open(initData, 'r') as f:
            dictInitData = ast.literal_eval(f.read())

    else:
        initData['m_syncLobbyState']['m_gameDescription'].pop('m_cacheHandles', None)
        initData['m_syncLobbyState']['m_gameDescription'].pop('m_mapFileName', None)
        initData['m_syncLobbyState']['m_gameDescription'].pop('m_slotDescriptions', None)

        return initData

    dictInitData['m_syncLobbyState']['m_gameDescription'].pop('m_cacheHandles', None)
    dictInitData['m_syncLobbyState']['m_gameDescription'].pop('m_mapFileName', None)
    dictInitData['m_syncLobbyState']['m_gameDescription'].pop('m_slotDescriptions', None)

    return dictInitData


def createDictAEDH(input, replayId, type="text"):
    '''
    Converts raw heroprotocol outputs --header, --details, --attributeevents to a <dict>.
    @param <file> input: raw data output of heroprotocol --header, --details, --attributeevents
    @return: python <dict> of --header, --details, --attributeevents
    '''
    if type == 'text':
        with open(input, 'r') as f:
            dictInput = ast.literal_eval(f.read())
        try:
            if dictInput['m_cacheHandles']:
                dictInput['m_cacheHandles'] = ['']
        except:
            pass
    else:
        input['replayId'] = replayId
        return input

    dictInput['replayId'] = replayId

    return dictInput


def getReplayId(dictInitData):
    '''
    Generates a unique ReplayId based on 'm_randomValue' and player names.
    @param <dict> dictInitData: <dict> from output of createDictInitData()
    @return <int>: unique replayId
    '''
    randomValue = dictInitData['m_syncLobbyState']['m_gameDescription']['m_randomValue']
    playerNames = ''

    for i in dictInitData['m_syncLobbyState']['m_userInitialData']:
        playerNames += i['m_name']

    replayId = hashlib.md5(str(randomValue) + playerNames).hexdigest()

    return replayId


def renameKeys(data):
    '''
    Currently unused, consider using in the future.
    '''
    for i in data:
        match = re.search('^m_', i)
        if match:
            new_key = i[2:len(i)]
            dictDetails[new_key] = dictDetails.pop(i)
        match = re.search('^_', i)
        if match:
            new_key = i[2:len(i)]
            dictDetails[new_key] = dictDetails.pop


def prepForDf(dictionary):
    '''
    Preps <dict> to proper Pandas <DataFrame> format with values as lists.  Does NOT break out embedded dictionaries.
    Use function flatten() for that.
    @param <dict> dictionary: <dict> that requires formatting
    @return <dict> dictionary: formatted <dict>
    '''
    for i in dictionary:
        # USE CASE 1: convert one <int> or <str> into a list for pandas DataFrame processing
        # (no <floats> in outputs)
        # print type(i), i, type(dictionary[i]), dictionary[i]
        if isinstance(dictionary[i], bool) or isinstance(dictionary[i], int) or isinstance(dictionary[i], str):
            dictionary[i] = [dictionary[i]]
            continue
        # USE CASE 2: convert one <list> with one <dictionary> w/ multiple elements to proper DataFrame format
        if isinstance(dictionary[i], list) and len(dictionary[i]) == 1 and isinstance(dictionary[i][0], dict):
            dictionary[i] = dictionary[i][0]
            continue
        # USE CASE 3: convert one <list> with multiple <dictionary>s to proper DataFrame format
        if isinstance(dictionary[i], list) and len(dictionary[i]) > 1 and isinstance(dictionary[i][0], dict):
            for d in dictionary[i]:
                prepForDf(d)
            continue
        # USE CASE 4: convert one <list> with multiple entries to a list with one tuple entry
        # Ignores lists with dictionaries in them to prevent wrapping a dictionary with a tuple layer
        if isinstance(dictionary[i], list) and len(dictionary[i]) > 0 and not isinstance(dictionary[i][0], dict):
            dictionary[i] = [tuple(dictionary[i])]
            continue
        # USE CASE 5: convert empty <dictionary> to a <list> with an empty <dictionary> inside
        if isinstance(dictionary[i], dict) and len(dictionary[i]) == 0:
            dictionary[i] = [{}]
            continue
        # USE CASE 6: convert <dictionary> with length = 1 to use parent key
        if isinstance(dictionary[i], dict) and len(dictionary[i]) == 1:
            dictionary[i] = dictionary[i].values()
            continue
        # USE CASE 7: convert <dictionary> with length > 1 as a separate dictionary w/ replayId
        if isinstance(dictionary[i], dict) and len(dictionary[i]) > 1:
            prepForDf(dictionary[i])
        # USE CASE 8: populate empty field with np.nan
        if dictionary[i] is np.nan:
            dictionary[i] = n
            continue
        if len(dictionary[i]) == 0:
            dictionary[i] = np.nan

    return dictionary


def flatten(d, parent_key='', sep='_'):
    '''
    Flattens embedded <dict> into parent <dict> by combining key names using '_' separator.
    @param <dict> d: <dict> to be flattened
    @param <str> parent_key: optional addition to new combined key
    @param <str> sep: <str> to combine key names
    @return: the flattened <dict>
    '''
    items = []
    for k, v in d.items():
        new_key = str(parent_key) + sep + str(k) if parent_key else k
        if isinstance(v, collections.MutableMapping):
            items.extend(flatten(v, new_key, sep=sep).items())
        else:
            items.append((new_key, v))
    return dict(items)


def prepDictInitData(dictInitData, replayId):
    '''
    Splits <dict> of InitData into the following tables: m_gameDescription, m_userInitialData, m_slots
    Reference the schema diagram for key breakout
    Return: Three dictionaries ready for conversion to DataFrames
    '''
    m_gameDescription, m_userInitialData, m_lobbyState = {}, {}, {}
    listOfDicts = [m_gameDescription, m_userInitialData, m_lobbyState]
    # contents of dictInitData['m_syncLobbyState']['m_userInitialData'] is a <list> of <dict>s
    listOfKeys = ['m_gameDescription', 'm_userInitialData', 'm_lobbyState']

    parent_key = 'm_syncLobbyState'
    for i in range(len(listOfDicts)):
        sub_key = listOfKeys[i]
        cur_dict = listOfDicts[i]
        if sub_key != 'm_userInitialData':
            cur_dict['replayId'] = replayId
            for key in dictInitData[parent_key][sub_key]:
                cur_dict[key] = dictInitData[parent_key][sub_key][key]
        else:
            # initialize keys in m_userInitialData
            for k in dictInitData[parent_key][sub_key][0]:
                cur_dict[k] = []
            cur_dict['m_userId'] = []
            cur_dict['replayId'] = []
            slotId = 0
            # Populate dictionary with a list, each <list> entry is one <dict> entry
            for d in dictInitData[parent_key][sub_key]:
                for entry in d:
                    cur_dict[entry].append(d[entry])
                cur_dict['m_userId'].append(slotId)
                cur_dict['replayId'].append(replayId)
                slotId += 1

    m_lobbyState = flatten(m_lobbyState)

    # remove parent keys from m_lobbyState and return m_slots as flat <dict> with <list> of each entry
    m_slots = {}
    # each <dict> has same elements
    # initialize keys in m_slots
    for k in m_lobbyState['m_slots'][0]:
        m_slots[k] = []
    m_slots['replayId'] = []
    # populate <dict>
    for d in m_lobbyState['m_slots']:
        for entry in d:
            if entry == 'm_colorPref':
                m_slots[entry].append(d[entry]['m_color'])
            else:
                m_slots[entry].append(d[entry])
        m_slots['replayId'].append(replayId)

    # clean m_slots
    m_slots['m_userId'] = m_slots.pop('m_workingSetSlotId', None)
    clean_m_slots = ['m_aiBuild', 'm_artifacts', 'm_licenses', 'm_logoIndex', 'm_racePref', 'm_rewards',
                     'm_tandemLeaderUserId', 'm_control', 'm_difficulty']
    for k in clean_m_slots:
        m_slots.pop(k, None)

    # clean m_userInitialData
    clean_m_initData = ['m_customInterface', 'm_examine', 'm_hero', 'm_mount', 'm_randomSeed',
                        'm_skin', 'm_teamPreference', 'm_racePreference', 'm_testAuto', 'm_testMap',
                        'm_testType', 'm_toonHandle', 'm_clanLogo', 'm_combinedRaceLevels']
    for k in clean_m_initData:
        m_userInitialData.pop(k, None)

    m_gameDescription = flatten(m_gameDescription)
    m_gameDescription = prepForDf(m_gameDescription)

    # clean m_gameDescription
    clean_m_gameDescription = ['m_defaultDifficulty', 'm_gameCacheName', 'm_gameOptions_m_advancedSharedControl',
                               'm_gameOptions_clientDebugFlags', 'm_defaultAIBuild', 'm_gameOptions_m_battleNet',
                               'm_gameOptions_m_competitive', 'm_gameOptions_m_cooperative', 'm_gameOptions_m_fog',
                               'm_gameOptions_m_lockTeams', 'm_gameOptions_m_noVictoryOrDefeat',
                               'm_gameOptions_m_practice', 'm_gameType', 'm_isCoopMode', 'm_isPremadeFFA',
                               'm_gameOptions_m_randomRaces', 'm_gameOptions_m_teamsTogether',
                               'm_gameOptions_m_userDifficulty', 'm_mapAuthorName', 'm_mapFileSyncChecksum',
                               'm_maxRaces', 'm_modFileSyncChecksum', 'm_gameOptions_m_clientDebugFlags',
                               'm_hasExtensionMod', 'm_maxColors', 'm_maxControls']
    for k in clean_m_gameDescription:
        m_gameDescription.pop(k, None)

    m_slots = flatten(m_slots)

    return m_gameDescription, m_userInitialData, m_slots


def prepDictHeader(dictHeader):
    '''
    Return: <dict> header ready for DataFrame conversion
    '''

    # clean header
    clean_header = ['m_ngdpRootKey', 'm_signature']
    for k in clean_header:
        dictHeader.pop(k, None)

    # flatten header
    dictHeader = flatten(dictHeader)

    # prep for df
    dictHeader = prepForDf(dictHeader)

    return dictHeader


def prepDictDetails(dictDetails, replayId):
    '''
    Converts dictDetails into dict ready for DataFrame conversion
    '''
    m_playerList = {}
    # each <dict> has same elements
    # initialize keys in m_slots
    for k in dictDetails['m_playerList'][0]:
        m_playerList[k] = []
    m_playerList['replayId'] = []
    # populate <dict>
    for d in dictDetails['m_playerList']:
        for entry in d:
            m_playerList[entry].append(d[entry])
        m_playerList['replayId'].append(replayId)

    # rename 'm_workingSetSlotId' to 'm_userId' for consistency
    m_playerList['m_userId'] = m_playerList.pop('m_workingSetSlotId')
    # clean m_playerList
    clean_m_playerList = ['m_race', 'm_color', 'm_toon']
    for k in clean_m_playerList:
        m_playerList.pop(k, None)

    return m_playerList


def testTEData(parentTE, m_intData, m_stringData, m_fixedData):
    '''
    @param: each <dict> of the --trackerevents output
    @return: print statements if proper formatting exists, otherwise, a <list> of key lengths of incorrect <dict>
    '''
    check = [parentTE, m_intData, m_stringData, m_fixedData]
    names = ['parentTE', 'm_intData', 'm_stringData', 'm_fixedData']

    index = 0
    for d in check:
        count = 0
        for k in d:
            errors = False
            if count == 0:
                prevLength = len(d[k])
            else:
                if len(d[k]) != prevLength:
                    errors = True
                    print 'ERROR: <dict>', names[index], 'keys have differing lengths!'
                    for k in d:
                        print '{0:<25} {1:>5}'.format(k, len(d[k]))
                    break
                else:
                    prevLength = len(d[k])
            count += 1
        if errors is False:
            print 'SUCCESS: <dict>', names[index], 'is ready for DataFrame conversion!'
        index += 1


def prepSummary(summary, m_gameDescription, dfHeader, dictDetails, dfDetails, df_m_slots, df_m_stringData, replayId):
    '''
    Creates <DataFrame> containing summary information from game
    @param <dict> summary: the <dict> in dictTE containing 'm_instanceList'
    @return: <DataFrame> of summary information
    '''
    gameTypeMap = {0: 'Custom',
                   50001: 'Quick Match',
                   50021: 'AI Coop',
                   50051: 'Unranked Draft',
                   50061: 'Hero League',
                   50071: 'Team League'}
    # references init_data 'm_ammId' to determine GameType in conjunction with the map defined above
    if isinstance(m_gameDescription['m_gameOptions_m_ammId'], float):
        gameTypeId = 0
    else:
        gameTypeId = m_gameDescription['m_gameOptions_m_ammId'][0]

    gameType = gameTypeMap[gameTypeId]

    # use summary['m_instnaceList'][0]['m_values'][2] because sometimes zero index has no values
    gameTime = summary['m_instanceList'][0]['m_values'][2][0]['m_time']
    dictSummary = {'ReplayId': [replayId] * 10,
                   'GameTime': [gameTime] * 10,
                   'GameType': [gameType] * 10,
                   'Map': [dictDetails['m_title']] * 10,
                   'DataBuildNum': [dfHeader['m_dataBuildNum'][0]] * 10,
                   'Win_Loss': ['Win' if x == 1 else 'Loss' for x in dfDetails['m_result']],
                   'UserId': [x for x in dfDetails['m_userId']],
                   'PlayerName': [x for x in dfDetails['m_name']],
                   'Hero': [x for x in dfDetails['m_hero']],
                   'Mount': [x for x in df_m_slots.loc[df_m_slots['m_userId'] < 10]['m_mount']],
                   'Skin': [x for x in df_m_slots.loc[df_m_slots['m_userId'] < 10]['m_skin']],
                   'Silenced': [x for x in df_m_slots.loc[df_m_slots['m_userId'] < 10]['m_hasSilencePenalty']]}

    for i in range(1, 8):
        key = 'Tier ' + str(i) + ' Choice'
        if key in df_m_stringData:
            dictSummary[key] = [x for x in df_m_stringData.loc[df_m_stringData['Tier 1 Choice'] > 0][key]]
        else:
            dictSummary[key] = ['', '', '', '', '', '', '', '', '', '']

    for d in summary['m_instanceList']:
        tempKey = d['m_name']
        tempVal = []
        for i in d['m_values']:
            if len(i) != 0:
                tempVal.append(i[0]['m_value'])
        dictSummary[tempKey] = tempVal

    dfSummary = pd.DataFrame(dictSummary)
    return dfSummary


def generateInitialData(path):
    '''
    Generates the initial data required to build <DataFrames>
    @param <string> path: destination of raw output *.txt files
    @return <dict> dictInitData: used to initialized the remainder of <dict> and <DataFrame>
    @return <str> replayId: unique identifier
    '''
    dictInitData = createDictInitData(path + 'init_data.txt')
    replayId = getReplayId(dictInitData)

    return dictInitData, replayId


def generateSummary(path, dictInitData, replayId):
    '''
    Generates all <DataFrame> for data mining.
    @param <string> path: destination of raw output *.txt files
    @retrun <DataFrame> dfSummary: summary data
    '''
    dictTE = createDictTGE(path + 'tracker_events.txt', replayId)
    # dictGE = createDictTGE(path + 'game_events.txt', replayId)
    dictHeader = prepDictHeader(createDictAEDH(path + 'header.txt', replayId))
    dictDetails = createDictAEDH(path + 'details.txt', replayId)
    prepDictDetails = prepDictDetails(dictDetails, replayId)
    m_gameDescription, m_userInitialData, m_slots = prepDictInitData(dictInitData, replayId)
    parentTE, m_intData, m_stringData, m_fixedData, summary = prepDictTE(dictTE, replayId)

    dfTE = pd.DataFrame(dictTE)
    # dfGE = pd.DataFrame(dictGE)
    dfHeader = pd.DataFrame(dictHeader)
    dfDetails = pd.DataFrame(prepDictDetails)
    df_m_gameDescription = pd.DataFrame(m_gameDescription)
    df_m_userInitialData = pd.DataFrame(m_userInitialData)
    df_m_slots = pd.DataFrame(m_slots)
    dfParentTE = pd.DataFrame(parentTE)
    df_m_intData = pd.DataFrame(m_intData)
    df_m_stringData = pd.DataFrame(m_stringData)
    df_m_fixedData = pd.DataFrame(m_fixedData)
    dfSummary = prepSummary(summary, m_gameDescription, dfHeader, dfDetails, df_m_slots, df_m_stringData, replayId)

    return dfSummary


def gameData(dfSummary):
    '''
    Generates <DataFrame> for Map level data
    @param <DataFrame> dfSummary: returned value of prepSummary()
    @return <DataFrame>: a subset of dfSummary containing 'ReplayId', 'DataBuildNum', 'GameTime'
                        , 'GameType', 'Map' with one row per game
    '''
    replayId = dfSummary['ReplayId'][0]
    dataBuildNum = dfSummary['DataBuildNum'][0]
    gameTime = dfSummary['GameTime'][0]
    gameType = dfSummary['GameType'][0]
    mapName = dfSummary['Map'][0]
    d = {'ReplayId': replayId, 'DataBuildNum': dataBuildNum, 'GameTime': gameTime,
         'GameType': gameType, 'Map': mapName}
    return pd.DataFrame(data=d, index=[0])


def playerData(dfSummary):
    '''
    Generates <DataFrame> for player data.
    @param <DataFrame> dfSummary: returned value of prepSummary()
    @return <DataFrame>: a subset of dfSummary with 10 rows per game
    '''
    df = dfSummary[['ReplayId', 'PlayerName', 'Hero', 'UserId', 'Takedowns', 'SoloKill', 'Assists', 'Deaths',
                    'HighestKillStreak', 'HeroDamage', 'SiegeDamage', 'StructureDamage', 'MinionDamage',
                    'CreepDamage', 'SummonDamage', 'TimeCCdEnemyHeroes', 'Healing', 'SelfHealing',
                    'DamageTaken', 'ExperienceContribution', 'TownKills', 'TimeSpentDead', 'MercCampCaptures',
                    'WatchTowerCaptures', 'MetaExperience', 'Win_Loss', 'Tier 1 Choice',
                    'Tier 2 Choice', 'Tier 3 Choice', 'Tier 4 Choice',
                    'Tier 5 Choice', 'Tier 6 Choice', 'Tier 7 Choice']]
    return df


def replayExists(currentFile, replayId):
    '''
    Checks if replayId exists in current file.
    @param currentFile: Pyton csv object
    @param <str> replayId: replayId
    @return <bool>: True/False
    '''
    dfReplayId = pd.read_csv(currentFile, usecols=['ReplayId'])

    if replayId in list(dfReplayId['ReplayId']):
        return True
    else:
        return False


def isMismatch(dict1, dict2):
    '''
    Used to support the unittest script tester.py.  Required to compared <dict> with <list> that include np.nan.
    np.nan == np.nan > False
    @param dict1 <dict>: loaded from JSON
    @param dict2 <dict>: generated from function
    @return <bool>: False if two <dict> are identical
    '''
    mismatch = False

    for k in dict1:
        for i in range(0, len(dict1[k])):
            if isinstance(dict1[k][i], float):
                if np.isnan(dict1[k][i]) and np.isnan(dict2[k][i]):
                    continue
                else:
                    print "\ndict 1:", dict1[k][i], "dict 2:", dict2[k][i]
                    mismatch = True
                    break
            if dict1[k][i] == dict2[k][i]:
                continue
            else:
                print "\ndict 1:", dict1[k][i], "dict 2:", dict2[k][i]
                mismatch = True
                break

    return mismatch


if __name__ == '__main__':
    dictInitData = createDictInitData('testData/init_data.txt')
    replayId = getReplayId(dictInitData)

    path = "testData/"

    dictTE = createDictTGE(path + 'tracker_events.txt', replayId)
    # dictGE = createDictTGE(path + 'game_events.txt', replayId)
    dictHeader = prepDictHeader(createDictAEDH(path + 'header.txt', replayId))
    dictDetails = createDictAEDH(path + 'details.txt', replayId)
    prepDictDetails = prepDictDetails(dictDetails, replayId)
    m_gameDescription, m_userInitialData, m_slots = prepDictInitData(dictInitData, replayId)
    parentTE, m_intData, m_stringData, m_fixedData, summary = prepDictTE(dictTE, replayId)

    dfTE = pd.DataFrame(dictTE)
    # dfGE = pd.DataFrame(dictGE)
    dfHeader = pd.DataFrame(dictHeader)
    dfDetails = pd.DataFrame(prepDictDetails)
    df_m_gameDescription = pd.DataFrame(m_gameDescription)
    df_m_userInitialData = pd.DataFrame(m_userInitialData)
    df_m_slots = pd.DataFrame(m_slots)
    dfParentTE = pd.DataFrame(parentTE)
    df_m_intData = pd.DataFrame(m_intData)
    df_m_stringData = pd.DataFrame(m_stringData)
    df_m_fixedData = pd.DataFrame(m_fixedData)
    dfSummary = prepSummary(summary, m_gameDescription, dfHeader, dictDetails, dfDetails, df_m_slots
                            , df_m_stringData, replayId)
    dfGameData = gameData(dfSummary)
    dfPlayerData = playerData(dfSummary)



In [214]:

    
import json

pick = dfTE
pick.to_json("testData/dfTE.txt")



In [217]:

    
with open('testData/dfTE.txt', 'r') as f:
    x = json.load(f)

df = pd.DataFrame(x)
cmp(df, dfTE)









    



---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
<ipython-input-217-5f7e71f94687> in <module>()
      3 
      4 df = pd.DataFrame(x)
----> 5 cmp(df, dfTE)

/Users/JoeK/anaconda/lib/python2.7/site-packages/pandas/core/ops.pyc in f(self, other)
   1175     def f(self, other):
   1176         if isinstance(other, pd.DataFrame):  # Another DataFrame
-> 1177             return self._compare_frame(other, func, str_rep)
   1178         elif isinstance(other, ABCSeries):
   1179             return self._combine_series_infer(other, func)

/Users/JoeK/anaconda/lib/python2.7/site-packages/pandas/core/frame.pyc in _compare_frame(self, other, func, str_rep)
   3582     def _compare_frame(self, other, func, str_rep):
   3583         if not self._indexed_same(other):
-> 3584             raise ValueError('Can only compare identically-labeled '
   3585                              'DataFrame objects')
   3586         return self._compare_frame_evaluate(other, func, str_rep)

ValueError: Can only compare identically-labeled DataFrame objects



In [206]:

    
cmp(dictTE, listTE)









    Out[206]:





0



In [211]:

    
tparentTE, tm_intData, tm_stringData, tm_fixedData, tsummary = prepDictTE(dictTE, replayId)



In [212]:

    
tm_stringData









    Out[212]:





{'CampType': [nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'Siege Camp',
  nan,
  nan,
  nan,
  nan,
  'Siege Camp',
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'Siege Camp',
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'Siege Camp',
  nan,
  nan,
  nan,
  nan,
  nan,
  'Siege Camp',
  'Siege Camp',
  nan,
  nan,
  nan,
  nan,
  'Boss Camp',
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan],
 'Controller': ['User',
  'User',
  'User',
  'User',
  'User',
  'User',
  'User',
  'User',
  'User',
  'User',
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan],
 'Hero': [nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'Malfurion',
  'Zarya',
  'Medic',
  'Chromie',
  'Muradin',
  'Butcher',
  'Jaina',
  'Barbarian',
  'Tychus',
  'Anubarak',
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'Malfurion',
  'Chromie',
  'Butcher',
  'Barbarian',
  'Anubarak',
  'Zarya',
  'Medic',
  'Muradin',
  'Jaina',
  'Tychus'],
 'Map': [nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'TowersOfDoom',
  'TowersOfDoom',
  'TowersOfDoom',
  'TowersOfDoom',
  'TowersOfDoom',
  'TowersOfDoom',
  'TowersOfDoom',
  'TowersOfDoom',
  'TowersOfDoom',
  'TowersOfDoom'],
 'PurchaseName': [nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'MuradinCombatStyleThirdWind',
  'GenericDampenMagic',
  'ChromieSandBlastCompoundingAether',
  'ZaryaShieldAllyTogetherWeAreStrong',
  'BarbarianCombatStyleShotofFury',
  'ButcherMasteryFreshMeatAbattoir',
  'TychusPressTheAdvantage',
  'MedicFeedbackLoop',
  'JainaFrostbiteLingeringChill',
  'MalfurionMasteryMoonburn',
  'ChromieSandBlastPiercingSands',
  'ZaryaShieldAllyGiveMeTwentyQuest',
  'TychusInTheRhythm',
  'JainaFrostbiteFrostArmor',
  'MuradinMasteryThunderburn',
  'MedicBioshield',
  'GenericTalentFocusedAttack',
  'ButcherMasteryRuthlessOnslaughtUnrelentingPursuit',
  'AnubarakCombatStyleLegionOfBeetles',
  'MalfurionCombatStyleElunesGrace',
  nan,
  'ChromieDragonsBreathDragonsEye',
  'BattleMomentumMuradin',
  'ZaryaEnergyHitMe',
  'BarbarianCombatStyleFerociousHealing',
  nan,
  'AnubarakCombatStyleChitinousPlating',
  'MalfurionMasteryStranglingVinesEntanglingRoots',
  'JainaFrostbiteFrostbitten',
  'ButcherMeatShield',
  'GenericTalentCleanse',
  'TychusMasteryQuarterback',
  nan,
  'ZaryaHeroicAbilityExpulsionZone',
  'MedicHeroicAbilityStimDrone',
  'JainaHeroicRingOfFrost',
  'MuradinHeroicAbilityAvatar',
  'TychusHeroicAbilityDrakkenLaserDrill',
  'ChromieHeroicAbilitySlowingSands',
  'AnubarakHeroicAbilityCarrionSwarm',
  'BarbarianHeroicAbilityWrathoftheBerserker',
  'MalfurionHeroicAbilityTranquility',
  'ButcherHeroicAbilityButcherFurnaceBlast',
  nan,
  'TychusThatsTheStuff',
  'ZaryaPainIsTemporary',
  'MuradinMasteryThunderclapHealingStatic',
  'MedicIntensiveCare',
  'JainaBlizzardStormFront',
  nan,
  nan,
  'ChromieReachingThroughTime',
  'BarbarianMasteryMysticalSpearAncientSpear',
  'GenericTalentBurningRage',
  'MalfurionMasteryLifeSeed',
  nan,
  'TychusMasteryOverkillArmorPiercingRounds',
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan],
 'Tier 1 Choice': [nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'MalfurionMasteryMoonburn',
  'ChromieSandBlastCompoundingAether',
  'ButcherMasteryFreshMeatAbattoir',
  'BarbarianCombatStyleShotofFury',
  'GenericDampenMagic',
  'ZaryaShieldAllyTogetherWeAreStrong',
  'MedicFeedbackLoop',
  'MuradinCombatStyleThirdWind',
  'JainaFrostbiteLingeringChill',
  'TychusPressTheAdvantage'],
 'Tier 2 Choice': [nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'MalfurionCombatStyleElunesGrace',
  'ChromieSandBlastPiercingSands',
  'ButcherMasteryRuthlessOnslaughtUnrelentingPursuit',
  'GenericTalentFocusedAttack',
  'AnubarakCombatStyleLegionOfBeetles',
  'ZaryaShieldAllyGiveMeTwentyQuest',
  'MedicBioshield',
  'MuradinMasteryThunderburn',
  'JainaFrostbiteFrostArmor',
  'TychusInTheRhythm'],
 'Tier 3 Choice': [nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'MalfurionMasteryStranglingVinesEntanglingRoots',
  'ChromieDragonsBreathDragonsEye',
  'ButcherMeatShield',
  'BarbarianCombatStyleFerociousHealing',
  'AnubarakCombatStyleChitinousPlating',
  'ZaryaEnergyHitMe',
  'GenericTalentCleanse',
  'BattleMomentumMuradin',
  'JainaFrostbiteFrostbitten',
  'TychusMasteryQuarterback'],
 'Tier 4 Choice': [nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'MalfurionHeroicAbilityTranquility',
  'ChromieHeroicAbilitySlowingSands',
  'ButcherHeroicAbilityButcherFurnaceBlast',
  'BarbarianHeroicAbilityWrathoftheBerserker',
  'AnubarakHeroicAbilityCarrionSwarm',
  'ZaryaHeroicAbilityExpulsionZone',
  'MedicHeroicAbilityStimDrone',
  'MuradinHeroicAbilityAvatar',
  'JainaHeroicRingOfFrost',
  'TychusHeroicAbilityDrakkenLaserDrill'],
 'Tier 5 Choice': [nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'MalfurionMasteryLifeSeed',
  'ChromieReachingThroughTime',
  nan,
  'BarbarianMasteryMysticalSpearAncientSpear',
  'GenericTalentBurningRage',
  'ZaryaPainIsTemporary',
  'MedicIntensiveCare',
  'MuradinMasteryThunderclapHealingStatic',
  'JainaBlizzardStormFront',
  'TychusThatsTheStuff'],
 'Tier 6 Choice': [nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'TychusMasteryOverkillArmorPiercingRounds'],
 'ToonHandle': ['1-Hero-1-306664',
  '1-Hero-1-4692090',
  '1-Hero-1-4510262',
  '1-Hero-1-2017074',
  '1-Hero-1-410587',
  '1-Hero-1-3518305',
  '1-Hero-1-6043729',
  '1-Hero-1-909112',
  '1-Hero-1-4486077',
  '1-Hero-1-861783',
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan],
 'Win/Loss': [nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  nan,
  'Loss',
  'Loss',
  'Loss',
  'Loss',
  'Loss',
  'Win',
  'Win',
  'Win',
  'Win',
  'Win'],
 '_bits': [1032,
  1040,
  1040,
  1040,
  1032,
  1040,
  1040,
  1032,
  1040,
  1032,
  672,
  640,
  640,
  656,
  656,
  656,
  640,
  672,
  648,
  664,
  840,
  768,
  888,
  896,
  864,
  872,
  816,
  768,
  848,
  816,
  856,
  880,
  760,
  816,
  824,
  736,
  832,
  1016,
  896,
  872,
  864,
  864,
  792,
  752,
  912,
  864,
  904,
  992,
  824,
  760,
  784,
  816,
  864,
  872,
  840,
  800,
  832,
  912,
  880,
  888,
  952,
  888,
  936,
  864,
  776,
  784,
  928,
  768,
  808,
  864,
  856,
  832,
  952,
  816,
  816,
  856,
  944,
  3488,
  3408,
  3104,
  3616,
  3368,
  3248,
  2952,
  3296,
  3160,
  3656],
 '_eventid': [10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10,
  10],
 '_gameloop': [0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  0,
  50,
  50,
  58,
  58,
  66,
  66,
  74,
  74,
  82,
  82,
  135,
  166,
  251,
  487,
  490,
  506,
  737,
  908,
  945,
  1172,
  2550,
  2984,
  2998,
  3171,
  3244,
  3264,
  3570,
  3655,
  3837,
  4055,
  4603,
  4647,
  5298,
  5430,
  5515,
  5535,
  5541,
  5578,
  5604,
  5667,
  5769,
  5909,
  6958,
  7308,
  7439,
  7690,
  7709,
  7745,
  7914,
  8813,
  8817,
  8841,
  8985,
  9673,
  9885,
  9897,
  9951,
  9995,
  10024,
  10099,
  10279,
  10394,
  11891,
  11942,
  11981,
  12478,
  12527,
  12704,
  12704,
  12704,
  12704,
  12704,
  12704,
  12704,
  12704,
  12704,
  12704],
 'replayId': ['8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21',
  '8aa384650abd5e9bcc0e110ef8fc2e21']}



In [219]:

    
df = pd.read_csv('/Users/JoeK/HOTSLogs Data 2015-12-30 - 2016-01-29/ReplayCharacters 2015-12-30 - 2016-01-29.csv')
df









    Out[219]:






  
    
      
      ReplayID
      Is Auto Select
      HeroID
      Hero Level
      Is Winner
      MMR Before
    
  
  
    
      0
      57010496
      False
      36
      9
      False
      2589
    
    
      1
      57010496
      False
      20
      9
      True
      2354
    
    
      2
      57010496
      False
      14
      6
      True
      2314
    
    
      3
      57010496
      False
      12
      10
      False
      2288
    
    
      4
      57010496
      False
      39
      10
      False
      2486
    
    
      5
      57010496
      False
      19
      10
      True
      2292
    
    
      6
      57010496
      False
      27
      9
      True
      2385
    
    
      7
      57010496
      False
      11
      7
      False
      2183
    
    
      8
      57010496
      False
      24
      9
      True
      2471
    
    
      9
      57010496
      False
      35
      3
      False
      2166
    
    
      10
      57010518
      False
      22
      4
      False
      2582
    
    
      11
      57010518
      False
      29
      6
      True
      2470
    
    
      12
      57010518
      False
      36
      8
      True
      2590
    
    
      13
      57010518
      False
      31
      9
      True
      2313
    
    
      14
      57010518
      False
      19
      8
      True
      2159
    
    
      15
      57010518
      False
      13
      7
      False
      1996
    
    
      16
      57010518
      False
      7
      6
      True
      2441
    
    
      17
      57010518
      False
      21
      7
      False
      2220
    
    
      18
      57010518
      False
      42
      9
      False
      2465
    
    
      19
      57010518
      False
      18
      11
      False
      2392
    
    
      20
      57368180
      False
      38
      8
      True
      2976
    
    
      21
      57368180
      False
      10
      8
      True
      2304
    
    
      22
      57368180
      False
      18
      7
      True
      1628
    
    
      23
      57368180
      False
      42
      2
      False
      2611
    
    
      24
      57368180
      False
      20
      4
      False
      2087
    
    
      25
      57368180
      False
      27
      10
      False
      2487
    
    
      26
      57368180
      False
      34
      4
      False
      2050
    
    
      27
      57368180
      False
      24
      9
      False
      1352
    
    
      28
      57368180
      False
      14
      8
      True
      1910
    
    
      29
      57368180
      False
      13
      10
      True
      2561
    
    
      ...
      ...
      ...
      ...
      ...
      ...
      ...
    
    
      19019210
      60397548
      False
      40
      12
      True
      2038
    
    
      19019211
      60397548
      False
      7
      8
      True
      2046
    
    
      19019212
      60397548
      False
      1
      15
      True
      2938
    
    
      19019213
      60397548
      False
      36
      9
      True
      2206
    
    
      19019214
      60397548
      False
      28
      4
      False
      2473
    
    
      19019215
      60397548
      False
      20
      7
      False
      2170
    
    
      19019216
      60397548
      False
      30
      6
      False
      1992
    
    
      19019217
      60397548
      False
      42
      5
      False
      2329
    
    
      19019218
      60397548
      False
      33
      9
      False
      2151
    
    
      19019219
      60397548
      False
      47
      6
      True
      1382
    
    
      19019220
      60396740
      False
      16
      7
      False
      2131
    
    
      19019221
      60396740
      False
      26
      8
      True
      2494
    
    
      19019222
      60396740
      False
      36
      10
      True
      2009
    
    
      19019223
      60396740
      False
      30
      5
      True
      2152
    
    
      19019224
      60396740
      False
      38
      4
      True
      2670
    
    
      19019225
      60396740
      False
      15
      5
      False
      2188
    
    
      19019226
      60396740
      False
      3
      6
      False
      1871
    
    
      19019227
      60396740
      True
      2
      0
      True
      2253
    
    
      19019228
      60396740
      False
      39
      3
      False
      1709
    
    
      19019229
      60396740
      False
      18
      5
      False
      1900
    
    
      19019230
      60337295
      False
      36
      9
      False
      2151
    
    
      19019231
      60337295
      False
      37
      8
      False
      2661
    
    
      19019232
      60337295
      False
      29
      11
      True
      2157
    
    
      19019233
      60337295
      False
      20
      9
      True
      2296
    
    
      19019234
      60337295
      False
      16
      5
      True
      2366
    
    
      19019235
      60337295
      False
      46
      7
      True
      2357
    
    
      19019236
      60337295
      False
      39
      8
      True
      2191
    
    
      19019237
      60337295
      False
      47
      10
      False
      2059
    
    
      19019238
      60337295
      False
      28
      10
      False
      2434
    
    
      19019239
      60337295
      False
      39
      8
      False
      2052
    
  

19019240 rows × 6 columns



In [228]:

    
def addMaxMMR(row, replayGroup):









    Out[228]:





0



In [233]:

    
df['Is Winner'] = df['Is Winner'].apply(lambda x: int(x))
df['Is Auto Select'] = df['Is Auto Select'].apply(lambda x: int(x))



In [242]:

    
replayGroup = df['MMR Before'].groupby([df['ReplayID'], df['Is Winner']])



In [245]:

    
replayGroupMax = replayGroup.max()

	ReplayID	Is Auto Select	HeroID	Hero Level	Is Winner	MMR Before
0	57010496	False	36	9	False	2589
1	57010496	False	20	9	True	2354
2	57010496	False	14	6	True	2314
3	57010496	False	12	10	False	2288
4	57010496	False	39	10	False	2486
5	57010496	False	19	10	True	2292
6	57010496	False	27	9	True	2385
7	57010496	False	11	7	False	2183
8	57010496	False	24	9	True	2471
9	57010496	False	35	3	False	2166
10	57010518	False	22	4	False	2582
11	57010518	False	29	6	True	2470
12	57010518	False	36	8	True	2590
13	57010518	False	31	9	True	2313
14	57010518	False	19	8	True	2159
15	57010518	False	13	7	False	1996
16	57010518	False	7	6	True	2441
17	57010518	False	21	7	False	2220
18	57010518	False	42	9	False	2465
19	57010518	False	18	11	False	2392
20	57368180	False	38	8	True	2976
21	57368180	False	10	8	True	2304
22	57368180	False	18	7	True	1628
23	57368180	False	42	2	False	2611
24	57368180	False	20	4	False	2087
25	57368180	False	27	10	False	2487
26	57368180	False	34	4	False	2050
27	57368180	False	24	9	False	1352
28	57368180	False	14	8	True	1910
29	57368180	False	13	10	True	2561
...	...	...	...	...	...	...
19019210	60397548	False	40	12	True	2038
19019211	60397548	False	7	8	True	2046
19019212	60397548	False	1	15	True	2938
19019213	60397548	False	36	9	True	2206
19019214	60397548	False	28	4	False	2473
19019215	60397548	False	20	7	False	2170
19019216	60397548	False	30	6	False	1992
19019217	60397548	False	42	5	False	2329
19019218	60397548	False	33	9	False	2151
19019219	60397548	False	47	6	True	1382
19019220	60396740	False	16	7	False	2131
19019221	60396740	False	26	8	True	2494
19019222	60396740	False	36	10	True	2009
19019223	60396740	False	30	5	True	2152
19019224	60396740	False	38	4	True	2670
19019225	60396740	False	15	5	False	2188
19019226	60396740	False	3	6	False	1871
19019227	60396740	True	2	0	True	2253
19019228	60396740	False	39	3	False	1709
19019229	60396740	False	18	5	False	1900
19019230	60337295	False	36	9	False	2151
19019231	60337295	False	37	8	False	2661
19019232	60337295	False	29	11	True	2157
19019233	60337295	False	20	9	True	2296
19019234	60337295	False	16	5	True	2366
19019235	60337295	False	46	7	True	2357
19019236	60337295	False	39	8	True	2191
19019237	60337295	False	47	10	False	2059
19019238	60337295	False	28	10	False	2434
19019239	60337295	False	39	8	False	2052