Per session and per user analysis

Analysis of users.

Table of Contents

  1. Preparation

  2. Function tests

  3. User metrics checks

Preparation


In [ ]:
%run "../Functions/3. Per session and per user analysis.ipynb"

In [ ]:
rmdf1522.head()

Per-session analysis


In [ ]:
testSessionId = "fab3ea03-6ff1-483f-a90a-74ff47d0b556"

perSession = rmdf1522[rmdf1522['type']=='reach'].loc[:,perSessionRelevantColumns]
perSession = perSession[perSession['sessionId']==testSessionId]
perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]
perSession

In [ ]:
allSessions = getAllSessions( rmdf1522, True )
allSessions.head()

In [ ]:
allSessions[allSessions['sessionId']==testSessionId]

In [ ]:
allSessions[allSessions['userId']=='e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1']

Per user analysis


In [ ]:
# English-speaking user who answered the questionnaire - cf 'Google form analysis.ipynb'.
localplayerguid = '8d352896-a3f1-471c-8439-0f426df901c1'
#localplayerguid = '7037c5b2-c286-498e-9784-9a061c778609'
#localplayerguid = '5c4939b5-425b-4d19-b5d2-0384a515539e'
#localplayerguid = '7825d421-d668-4481-898a-46b51efe40f0'
#localplayerguid = 'acb9c989-b4a6-4c4d-81cc-6b5783ec71d8'
localplayerguid

In [ ]:
perUserRelevantColumns = ['sessionId', 'serverTime', 'section']

In [ ]:
sessionsList = getAllSessionsOfUser(rmdf1522, localplayerguid, True)
sessionsList

In [ ]:
# List all 'reach' events with those sessionIds.
perUser = rmdf1522[rmdf1522['type']=='reach'].loc[:,perUserRelevantColumns]
perUser = perUser[perUser['sessionId'].isin(sessionsList['sessionId'])]
perUser = perUser[perUser['section'].str.startswith('tutorial', na=False)]
perUser.describe()
perUser.head()

Common analysis

Switch here between users and sessions.


In [ ]:
#sectionsList = perSession
sectionsList = perUser

getCheckpointsTimes tinkering


In [ ]:
testUser = getRandomGFormGUID()
testSession = getRandomSessionGUID( _userId = testUser )

In [ ]:
timedSections1 = getCheckpointsTimes(testSession)
timedSections1

In [ ]:
sessionId = testSession
_rmDF = rmdf1522
testCounter = 0

# Returns a given session's checkpoints, the first server time at which they were reached, and completion time
#def getCheckpointsTimes( sessionId, _rmDF = rmdf1522 ):
reachEvents = _rmDF[_rmDF['type']=='reach'].loc[:,perSessionRelevantColumns]
perSession = reachEvents[reachEvents['sessionId']==sessionId]
perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]

timedSections = pd.DataFrame(data=0, columns=timedSectionsReachedColumns,index=timedSectionsIndex)
timedSections['firstReached'] = pd.Timestamp(0, tz='utc')
timedSections['firstCompletionDuration'] = pd.Timedelta.max

if(len(perSession) > 0):
    timedSections["firstReached"] = perSession.groupby("section").agg({ "serverTime": np.min })
    timedSections["firstCompletionDuration"] = timedSections["firstReached"].diff()
    
    if(timedSections.loc["tutorial1.Checkpoint00","firstReached"] != pd.Timestamp(0, tz='utc')):
        timedSections.loc["tutorial1.Checkpoint00","firstCompletionDuration"] = \
        pd.Timedelta(0)
    
timedSections["firstReached"] = timedSections["firstReached"].fillna(pd.Timestamp(0, tz='utc'))
timedSections["firstCompletionDuration"] = timedSections["firstCompletionDuration"].fillna(pd.Timedelta.max)
    
timedSections

In [ ]:
len(timedSections)

In [ ]:
chapter = "tutorial1.Checkpoint01"
time = ''
if(not chapter in timedSections.index):    
    print("no timed sections")
else:
    time = timedSections.loc[chapter,"firstCompletionDuration"]
time

In [ ]:
timedSections1 == timedSections

In [ ]:
reachEvents.iloc[0,0]

Function tests


In [ ]:
#'7412a447-8177-48e9-82c5-cb31032f76a9': didn't answer
testUser = getRandomGFormGUID()
testResult = getUserDataVector(testUser)
print(testUser)
testResult

In [ ]:
testResult[testUser]['death']

In [ ]:
testResult = getUserDataVector('e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1')
testResult

In [ ]:
testResult = getUserDataVector('8d352896-a3f1-471c-8439-0f426df901c1')
testResult

In [ ]:
gformNotEnough = []
print(gformNotEnough)

gformNotEnough.append(5)
print(gformNotEnough)

gformNotEnough = pd.Series(gformNotEnough)
print(gformNotEnough)

gformNotEnough = np.array([])
print(gformNotEnough)

gformNotEnough = np.append(gformNotEnough, [5])
print(gformNotEnough)

gformNotEnough = pd.Series(gformNotEnough)
print(gformNotEnough)

testNonVal = pd.Series(['tutorial1.Checkpoint13'])

getUserCheckpoints tinkering


In [ ]:
userId = getRandomRedMetricsGUID()
_rmDF = rmdf1522

# Returns a given user's unique reached checkpoints
#def getUserCheckpoints( userId, _rmDF = rmdf1522 ):
#print("getUserCheckpoints(" + str(userId) + ")")

# List of associated sessions
sessionsList = getAllSessionsOfUser( _rmDF, userId, True )
#print("sessionsList=" + str(sessionsList))

# List all 'reach' events with those sessionIds.
reachEvents = _rmDF[_rmDF['type']=='reach'].loc[:,perSessionRelevantColumns]
perUser = reachEvents[reachEvents['sessionId'].isin(sessionsList['sessionId'].values)]
perUser = perUser[perUser['section'].str.startswith('tutorial', na=False)]
pd.Series(perUser['section'].unique())

getDiscrepancyGameGForm tinkering


In [ ]:
gformNonVal = getNonValidatedCheckpoints(userId)
gformVal = getValidatedCheckpoints(userId)
gameVal = getUserCheckpoints(userId)
print(str(gformNonVal))
print()
print(str(gformVal))
print()
print(str(gameVal))

userId = getRandomRedMetricsGUID()

userId = '"72002481-18a1-4de2-8749-553bbabe119e"'

def getDiscrepancyGameGForm( userId ):

if(hasAnswered(userId)): gformNonVal = getNonValidatedCheckpoints(userId) gformVal = getValidatedCheckpoints(userId) gameVal = getUserCheckpoints(userId)

#sorted, unique values in series1 that are not in series2
#np.setdiff1d(series1.values, series2.values)

#user has answered questions whose answer they haven't seen in the game
gameNotEnough = pd.Series(np.setdiff1d(gformVal.values, gameVal.values))

#user has not answered questions whose answer they have seen in the game
gformNotEnough = []
maxGameVal = ''
if gameVal.values.size!=0:
    gameVal.values.max()
for nonVal in gformNonVal.values:
    if nonVal >= maxGameVal:
        gformNotEnough.append(nonVal)    
gformNotEnough = pd.Series(gformNotEnough)

result = (gameNotEnough, gformNotEnough)

else: result = ([],[]) result


In [ ]:
randomguid = getRandomRedMetricsGUID()
randomguid

In [ ]:
gformNonVal = getNonValidatedCheckpoints(randomguid)
gformNonVal

In [ ]:
gformVal = getValidatedCheckpoints(randomguid)
gformVal

In [ ]:
gameVal = getUserCheckpoints( randomguid )
gameVal

sorted, unique values in series1 that are not in series2

np.setdiff1d(series1.values, series2.values)

user has answered questions whose answer they haven't seen in the game

gameNotEnough = pd.Series(np.setdiff1d(gformVal.values, gameVal.values))


In [ ]:
#user has not answered questions whose answer they have seen in the game
gformNotEnough = []
maxGameVal = ''

if gameVal.values.size!=0: gameVal.values.max() for nonVal in gformNonVal.values: if nonVal >= maxGameVal: gformNotEnough.append(nonVal)
gformNotEnough = pd.Series(gformNotEnough)

getDiscrepancyGameGForm( randomguid )


In [ ]:
test = getValidatedCheckpoints(localplayerguid)
test

maxValue = ''
if (len(test) > 0):
    maxValue = test.values.max()
maxValue

getNonValidatedCheckpoints(localplayerguid)

testlocalplayerguid = '7412a447-8177-48e9-82c5-cb31032f76a9'

test = pd.DataFrame({
                        'section' : ['tutorial1.Checkpoint00', 'tutorial1.Checkpoint01', 'tutorial1.Checkpoint02'],
                        'serverTime' : ['0', '1', '2'],
                        'firstReached' : ['0', '1', '2'],
                        'firstCompletionDuration' : ['0', '1', '2'],
                    })
test
#pd.DataFrame({  'A' : 1.,
#                'B' : pd.Timestamp('20130102'),
#                'C' : pd.Series(1,index=list(range(4)),dtype='float32'),
#                'D' : np.array([3] * 4,dtype='int32'),
#                'E' : pd.Categorical(["test","train","test","train"]),
#                'F' : 'foo' })

getCheckpointsTimesUser tinkering


In [ ]:
# incomplete game
#_userId = '958a0e85-1634-4559-bce6-d6af28b7e649' 
_userId = 'dfe8f036-8641-4d6c-8411-8a8346bb0402'
#_userId = getRandomRedMetricsGUID()
_sessionsList = []
_rmDF = rmdf1522

# Returns a given user's checkpoints, the first server time at which they were reached, and completion time
#def getCheckpointsTimesUser( _userId, _sessionsList = [], _rmDF = rmdf1522 ):
# List of associated sessions
if( len(_sessionsList) == 0):
    _sessionsList = getAllSessionsOfUser( _rmDF, _userId, True )

# Call getCheckpointsTimes on all sessions associated with user,
# then merge by taking oldest checkpoint completion
_timedSections = pd.DataFrame(data=0, columns=timedSectionsReachedColumns,index=timedSectionsIndex)
_timedSections["firstReached"] = pd.Timestamp(0, tz='utc')
_timedSections["firstCompletionDuration"] = pd.Timedelta.max

# merge
# for each checkpoint reached, update if necessary
for _sessionId in _sessionsList['sessionId']:        
    _thisSessionTimes = getCheckpointsTimes( _sessionId )

    for _checkpointName in _thisSessionTimes.index:
        if ((_thisSessionTimes.loc[_checkpointName, 'firstReached'] != pd.Timestamp(0, tz='utc'))
            and
            ((_timedSections.loc[_checkpointName, 'firstReached'] == pd.Timestamp(0, tz='utc'))
            or (_timedSections.loc[_checkpointName, 'firstReached'] > _thisSessionTimes.loc[_checkpointName, 'firstReached']))
           ):
            _timedSections.loc[_checkpointName, 'firstReached'] = _thisSessionTimes.loc[_checkpointName, 'firstReached']
            _timedSections.loc[_checkpointName, 'firstCompletionDuration'] = _thisSessionTimes.loc[_checkpointName, 'firstCompletionDuration']

_timedSections

print('second pass')

previous = '' for checkpointName in thisSessionTimes.index: if(checkpointName != "tutorial1.Checkpoint00"): if( timedSections.loc[previous,"firstReached"] != pd.Timestamp(0) and timedSections.loc[checkpointName,"firstReached"] != pd.Timestamp(0) ): timedSections.loc[checkpointName,"firstCompletionDuration"] =\ timedSections.loc[checkpointName,"firstReached"] - timedSections.loc[previous,"firstReached"] previous = checkpointName

timedSections["firstCompletionDuration"] = timedSections["firstReached"].diff()

timedSections

getPlayedTimeSession tinkering


In [ ]:
testUser = "3fe0632f-b218-41c3-adfd-27083f271c19"
testSession = getRandomSessionGUID( _userId = testUser )

_rmDF[_rmDF['sessionId']==sessionId]

In [ ]:
length = 1

allUserIds = np.array(rmdf1522['userId'].unique())
allUserIds = [i for i in allUserIds if not i in ['nan', np.nan, 'null']]

for user in allUserIds:
    testUser = user #getRandomGFormGUID()
    testSession = getRandomSessionGUID( _userId = testUser )
    #testUser = '8172f20e-c29b-4fda-9245-61ab05a84792'
    if testSession != '':
        sessionId = testSession
        #print(sessionId)
        _rmDF = rmdf1522

        # Returns a given session's total playtime and day count
        #def getPlayedTimeSession( sessionId, _rmDF = rmdf1522 ):
        sessionEvents = _rmDF[_rmDF['sessionId']==sessionId]
        sessionTimesTutorial = sessionEvents[sessionEvents['section'].str.startswith('tutorial', na=False)]['userTime']
        #sessionTimesTutorial = sessionTimesTutorial.groupby(sessionTimesTutorial).diff()
        sessionTimesTutorial.index = sessionTimesTutorial.values
        sessionTimesTutorial = sessionTimesTutorial.groupby(pd.TimeGrouper('D')).agg({ "start": np.min, "end": np.max })
        #, pd.TimeGrouper('D')
        #sessionEventsSandbox = sessionEvents[sessionEvents['section'].str.startswith('sandbox', na=False)]

        #print([0,0])
        #type(sessionTimesTutorial),sessionTimesTutorial,testUser
        length = len(sessionTimesTutorial.index)
        if (length > 1):
            print("user = " + str(testUser) + " session = " + str(testSession) + " length = " + str(length))

In [ ]:
# checks

#usersWithSeveralSessions = []
#for userId in allUserIds:
#    count = countSessions(userId, False, [], rmdf1522)
#    if(count > 3):
#        usersWithSeveralSessions.append(userId)
        #print("userId="+str(userId)+"    : " + str(count))

#rmdf1522[rmdf1522['userId']=='57e2b6b7-c308-4492-9228-f753d5b3044c']['customData.platform'].unique()
#rmdf1522[rmdf1522['userId']=='57e2b6b7-c308-4492-9228-f753d5b3044c']

#userId = 'deb089c0-9be3-4b75-9b27-28963c77b10c'
#for userId in usersWithSeveralSessions:
#    print(str(userId)+" :")
#    for sessionId in getAllSessionsOfUser(rmdf1522, userId)['sessionId']:
#        print(str(sessionId)+" : " + str(getPlayedTimeSession(sessionId)))
#    print()

getPlayedTimeSessionMode tinkering


In [ ]:
testSession = "7ea5d49a-14f3-40b8-b9c4-d3d52eb0c4e1" #4
#sessionEvents = pd.DataFrame(columns=_rmDF.columns)
sessionEvents = rmdf1522[rmdf1522['sessionId']==testSession]
mode = 'tutorial'

#def getPlayedTimeSessionMode(sessionEvents, mode):
sessionTimes = sessionEvents[sessionEvents['section'].str.startswith(mode, na=False)]['userTime']
sessionTimes.index = sessionTimes.values

daysSpent = set()
totalSpentTime = pd.Timedelta(0)

if(len(sessionTimes) > 0):
    sessionTimes = sessionTimes.groupby(pd.TimeGrouper('D')).agg({ "start": np.min, "end": np.max })

    daysSpent = set(sessionTimes.index)

    sessionTimes['played'] = sessionTimes['end'] - sessionTimes['start']
    totalSpentTime = sessionTimes['played'].sum()

{'daysSpent': daysSpent, 'totalSpentTime': totalSpentTime}

In [ ]:
getPlayedTimeSessionMode(sessionEvents, 'tutorial')

In [ ]:
getPlayedTimeSessionMode(pd.DataFrame(columns=_rmDF.columns), 'tutorial')

getPlayedTimeSession tinkering


In [ ]:
#testUser = user #getRandomGFormGUID()
#testSession = getRandomSessionGUID( _userId = testUser )
#testUser = '8172f20e-c29b-4fda-9245-61ab05a84792'
#testSession = "1d16f3f2-2f76-49ee-bb37-9742ed54287a" #5 + NaT
testSession = "7ea5d49a-14f3-40b8-b9c4-d3d52eb0c4e1" #4

sessionId = testSession
#print(sessionId)
_rmDF = rmdf1522

# Returns a given session's total playtime and day count
#def getPlayedTimeSession( sessionId, _rmDF = rmdf1522 ):
sessionEvents = _rmDF[_rmDF['sessionId']==sessionId]
tutorialTime = getPlayedTimeSessionMode(sessionEvents, 'tutorial')
sandboxTime = getPlayedTimeSessionMode(sessionEvents, 'sandbox')
{'tutorial': tutorialTime, 'sandbox': sandboxTime}

In [ ]:
getPlayedTimeSession('', _rmDF = _rmDF)

mergePlayedTimes tinkering and test


In [ ]:
a = getPlayedTimeSession("054a96ca-c2f1-4967-9b77-6ce4c33c9d33")
b = getPlayedTimeSession("e5421d6c-2f55-4279-8d82-bbafbe16d635")
a,b

In [ ]:
c = {'sandbox': 
         {
            'daysSpent': 
             {
                 pd.Timestamp('2017-06-07 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-08 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-09 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-10 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-11 00:00:00', freq='D'),
             },
            'totalSpentTime': pd.Timedelta('0 days 00:09:34.662000')
         },
  'tutorial': 
         {
            'daysSpent': 
             {
                 pd.Timestamp('2017-06-07 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-08 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-09 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-10 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-11 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-12 00:00:00', freq='D'),
             },
            'totalSpentTime': pd.Timedelta('0 days 00:00:11.007000')
         }
    }
d = {'sandbox':
     {
            'daysSpent': 
             {
                 pd.Timestamp('2017-06-06 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-07 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-08 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-09 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-10 00:00:00', freq='D'),
             },
            'totalSpentTime': pd.Timedelta('0 days 00:09:34.662000')
     },
      'tutorial':
     {
            'daysSpent': 
             {
                 pd.Timestamp('2017-06-05 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-06 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-07 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-08 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-09 00:00:00', freq='D'),
                 pd.Timestamp('2017-06-10 00:00:00', freq='D'),
             },
            'totalSpentTime': pd.Timedelta('0 days 00:00:11.007000')
     }
    }

c['tutorial']['daysSpent'] | d['tutorial']['daysSpent']

In [ ]:
#a = getPlayedTimeSession("054a96ca-c2f1-4967-9b77-6ce4c33c9d33")
#b = getPlayedTimeSession("e5421d6c-2f55-4279-8d82-bbafbe16d635")
a = c
b = d

#print(a['sandbox']['daysSpent'], a['sandbox']['totalSpentTime'],\
#a['tutorial']['daysSpent'], a['tutorial']['totalSpentTime'],\
#b['sandbox']['daysSpent'], b['sandbox']['totalSpentTime'],\
#b['tutorial']['daysSpent'], b['tutorial']['totalSpentTime'])

#print(a,b)

#def mergePlayedTimes(a, b):
result = a.copy()
for gameMode in a:
    result[gameMode] = {
        'totalSpentTime': a[gameMode]['totalSpentTime'] + b[gameMode]['totalSpentTime'],
        'daysSpent': np.unique(a[gameMode]['daysSpent'] | b[gameMode]['daysSpent']),
    }
result

getPlayedTimeUser tinkering


In [ ]:
#userId = 'ae72a4cb-244e-475c-80ea-11a410266645'
userId = '6bc0f58c-26ed-4be9-9596-2a9ad8d11d67'
_sessionsList = []
_rmDF = rmdf1522

# Returns a given user's total playtime and day count
#def getPlayedTimeUser( userId, _sessionsList = [], _rmDF = rmdf1522 ):
result = getPlayedTimeSession('', _rmDF = _rmDF)

if(len(_sessionsList) == 0):
    _sessionsList = getAllSessionsOfUser(_rmDF, userId)
for session in _sessionsList['sessionId']:
#for session in ["e5421d6c-2f55-4279-8d82-bbafbe16d635","e5421d6c-2f55-4279-8d82-bbafbe16d635","e5421d6c-2f55-4279-8d82-bbafbe16d635"]:
    playedTimes = getPlayedTimeSession(session, _rmDF)
    result = mergePlayedTimes(result, playedTimes)
    
result

getDeaths tinkering


In [ ]:
sessionId = "fab3ea03-6ff1-483f-a90a-74ff47d0b556"
_rmDF = rmdf1522

# Returns a given session's checkpoints, and death count
#def getDeaths( sessionId, _rmDF = rmdf1522 ):
deathEvents = _rmDF[_rmDF['type']=='death'].loc[:,perSessionRelevantColumns]
perSession = deathEvents[deathEvents['sessionId']==sessionId]
perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]
deathsSections = perSession.groupby("section").size().reset_index(name='deathsCount')
deathsSections

getDeathsUser tinkering


In [ ]:
userId = 'ae72a4cb-244e-475c-80ea-11a410266645'
_rmDF = rmdf1522

#def getDeathsUser( userId, _rmDF = rmdf1522 ):
#print("getDeathsUser(" + str(userId) + ")")

# List of associated sessions
sessionsList = getAllSessionsOfUser( _rmDF, userId, True )
#print("sessionsList=" + str(sessionsList))

# Call getDeaths on all sessions associated with user,
# then merge by adding
deathsSections = pd.DataFrame(0, columns=timedSectionsDeathsColumns,index=timedSectionsIndex)

for sessionId in sessionsList['sessionId']:        
    #print("processing user " + str(userId) + " with session " + str(sessionId))
    deaths = getDeaths( sessionId )

    # merge
    # for each checkpoint reached, update if necessary
    for index in deaths.index:
        #print("index=" + str(index))
        checkpointName = deaths['section'][index]
        #print("checkpointName=" + str(checkpointName))
        #print("deaths['deathsCount']["+str(index)+"]=" + str(deaths['deathsCount'][index]))

        deathsSections['deathsCount'][checkpointName] = deathsSections['deathsCount'][checkpointName] + deaths['deathsCount'][index]

deathsSections

getUserCraftEventsTotal tinkering


In [ ]:
# craftEventCodes = list(["equip","unequip","add","remove"])
eventCode = 'equip'
userId = getRandomRedMetricsGUID()
sessionsList=[]
_rmDF = rmdf1522

#def getUserCraftEventsTotal( eventCode, userId, sessionsList=[], _rmDF = rmdf1522 ):
if(len(sessionsList) == 0):
    sessionsList = getAllSessionsOfUser( _rmDF, userId, True )

result = 0
    
if eventCode in craftEventCodes:
    eventType = craftEventsColumns['eventType'][eventCode]
    events = _rmDF[_rmDF['type']==eventType]
    events = events[events[craftEventsColumns['column'][eventCode]].notnull()]
    perSession = events[events['sessionId'].isin(sessionsList['sessionId'])]
    result = len(perSession)
else:
    print("incorrect event code '" + eventCode + "'")
result, userId

In [ ]:

getUserEventsTotal tinkering


In [ ]:
eventType = 'death'
#userId = 'e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1'
userId = getRandomRedMetricsGUID()
sessionsList=[]
_rmDF = rmdf1522

#def getUserEventsTotal( eventType, userId, sessionsList=[], _rmDF = rmdf1522 ):
if(len(sessionsList) == 0):
    sessionsList = getAllSessionsOfUser( _rmDF, userId, True )

sessionEvents = _rmDF[_rmDF['type']==eventType]
perSession = sessionEvents[sessionEvents['sessionId'].isin(sessionsList['sessionId'])]
len(perSession)

getSessionDataPreview tinkering


In [ ]:
userId = getSurveysOfBiologists(gform)[localplayerguidkey].iloc[2]
#sample = gform[gform[localplayerguidkey] == userId]

In [ ]:
_rmDF[_rmDF['sessionId'] == _sessionId]['type'].value_counts()

In [ ]:
_rmDF = rmdf1522
sessions = getAllSessionsOfUser( _rmDF, userId, True )
_sessionId = sessions['sessionId'].iloc[0]

# for per-session, manual analysis
#def getSessionDataPreview( _sessionId, _rmDF ):
_logs = _rmDF[_rmDF['sessionId'] == _sessionId]

_timedEvents = _logs['userTime']
_timedEvents = _timedEvents.sort_values()
_platform = _logs['customData.platform'].dropna().values
if(len(_platform) > 0):
    _platform = _platform[0]
else:
    _platform = ''
_events = _logs['type'].value_counts()
result = {
    'first' : _timedEvents.iloc[0],
    'last' : _timedEvents.iloc[-1],
    'platform' : _platform,
    'events' : _events
}
print(result)

In [ ]:
events, first, last, platform, = result.values()

In [ ]:
first, last, platform, events

getUserDataPreview tinkering


In [ ]:
userId = getSurveysOfBiologists(gform)[localplayerguidkey].iloc[2]
#sample = gform[gform[localplayerguidkey] == userId]

In [ ]:
events, first, last, platform

In [ ]:
events

In [ ]:
sdp = getSessionDataPreview(_sessionId, _rmDF = _rmDF)

In [ ]:
sdp

In [ ]:
#userId = getRandomGFormGUID()
_rmDF = rmdf1522
scoreLabel = 'score'

# for per-user, manual analysis
#def getUserDataPreview( userId, _rmDF = rmdf1522 ):

result = pd.DataFrame(
        columns = [userId]
    )

#    [ ] RM
result.loc['REDMETRICS ANALYSIS'] = ' '
#      [ ] sessions count
sessions = getAllSessionsOfUser( _rmDF, userId, True )
result.loc['sessions', userId] = len(sessions)
#      [ ] first event date
result.loc['firstEvent', userId] = getFirstEventDate( userId )
#      [ ] time played
#      [ ] dates played
#      [ ] first played, last played
sessionIds = sessions['sessionId']
for _sessionIdIndex in range(0, len(sessions['sessionId'])):
    _sessionId = sessionIds.iloc[_sessionIdIndex]
    sdp = getSessionDataPreview(_sessionId, _rmDF = _rmDF)

    result.loc['session' + str(_sessionIdIndex) + ' platform',userId] = sdp['platform']
    result.loc['session' + str(_sessionIdIndex) + ' first',userId] = sdp['first']
    result.loc['session' + str(_sessionIdIndex) + ' last',userId] = sdp['last']
    result.loc['session' + str(_sessionIdIndex) + ' events',userId] = str(sdp['events'])
#      [ ] best chapter
#      [ ] counts of events: deaths, crafts,...

#    [ ] GF
result.loc['GFORM ANALYSIS'] = ' '
#      [ ] score(s)
score = getScore( userId )
for _temporality in score.columns:
    _score = score.loc[scoreLabel,_temporality]
    if(len(_score)>0):
        if(_temporality == answerTemporalities[0]):
            _score = _score[len(_score)-1]
        else:
            _score = _score[0]
    else:
        _score = np.nan
    result.loc[scoreLabel+_temporality,userId] = _score
#        [ ] progression
#      [ ] demographics
result.loc[scoreLabel+'s',userId] = str(score.values)

gfDataPreview = getGFormDataPreview(userId, gform)
features = {1: 'date', 2: 'temporality RM', 3: 'temporality GF', 4: 'score', 5: 'genderAge'}
for key in gfDataPreview:
    for featureKey in features:
        result.loc[key + ' ' + features[featureKey]] = str(gfDataPreview[key][features[featureKey]])
    index = 0
    for match in gfDataPreview[key]['demographic matches']:
        result.loc[key + ' demographic match ' + str(index)] = repr(match)
        index += 1
     
result

In [ ]:
answerTemporalities

In [ ]:
#getUserDataPreview(undefinedId)

In [ ]:
for undefinedId in gform[gform[QTemporality] == answerTemporalities[2]][localplayerguidkey]:
    getUserDataPreview(undefinedId)

Checks on user metrics

Sequence of actions

sandbox
crafting
equip device
unequip device
add PCONS
add 6
add Ampicillin
add T
> auto craft
> auto equip
remove T
> auto unequip
add T
> auto equip
add 12
> auto craft
> auto equip
add 6
> auto equip
exit crafting
dies
> auto unequip
set language to english

In [ ]:
rdfcrafttest = pd.read_csv("../../data/2017-10-10.craft-test.csv")
rdfcrafttest = getNormalizedRedMetricsCSV(rdfcrafttest)
rdfcrafttest

In [ ]:
craftEventsColumns

In [ ]:
craftEventsColumns['column']['equip']

In [ ]:
type(craftEventCodes)

In [ ]:
test = np.unique(np.concatenate((perSessionRelevantColumns, [craftEventsColumns['column']['equip']])))
test

In [ ]:
# user 344 adds
#'e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1'
# one of its sessions
# fab3ea03-6ff1-483f-a90a-74ff47d0b556
#
# user 22 adds
#'8d352896-a3f1-471c-8439-0f426df901c1'
#

# session test
craftSessionTest = getSectionsCraftEvents('equip', "fab3ea03-6ff1-483f-a90a-74ff47d0b556")
# user test
craftUserTest = getUserSectionsCraftEvents('equip', 'e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1')
# user count test
craftUserTestCount = getUserSectionsCraftEventsTotal('equip', 'e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1')
craftUserTestCount

In [ ]:
print("craftSessionTest=" + str(craftSessionTest))
print("craftUserTest=" + str(craftUserTest))
print("craftUserTestCount=" + str(craftUserTestCount))

In [ ]:
columnName = craftEventsColumns['column']['equip']
columnName

In [ ]:
result = list([])
for entry in rmdf1522[columnName]:
    if not pd.isnull(entry):
        result.append(entry)
result

In [ ]:
#rmdf1522[columnName].notnull()

In [ ]:
sectionsEvents = pd.DataFrame(0, columns=eventSectionsCountColumns, index=range(0))
sectionsEvents

In [ ]:
#events = rmdf1522[rmdf1522['type']==eventType and not rmdf1522[craftEventsColumns['column'][eventCode]].isnull()].loc[:,perSessionRelevantColumns]

In [ ]:

getUserDataVector tinkering


In [ ]:
testUser = getRandomGFormGUID()
print(testUser)
#testResult = getUserDataVector(testUser)
#testResult

In [ ]:
userId = getRandomGFormGUID()
#userId = '1f27519a-971f-4e39-bac7-9920bfc4b05b' #undefined temporality
#userId = 'e2f8d5e4-cccd-4d1a-909b-c9c92f6b83c1' #has not answered
print(userId)
_source = correctAnswers
_rmDF = rmdf1522

#def getUserDataVector( userId, _source = [], _rmDF = rmdf1522 ):
sessionsList = getAllSessionsOfUser( _rmDF, userId, True )

columnName = str(userId)

data = pd.DataFrame(0, columns=[columnName],index=userDataVectorIndex)

score = getScore( userId )
for _temporality in score.columns:
    _score = score.loc[scoreLabel,_temporality]
    if(len(_score)>0):
        if(_temporality == answerTemporalities[0]):
            _score = _score[len(_score)-1]
        else:
            _score = _score[0]
    else:
        _score = np.nan
    data.loc[scoreLabel+_temporality,columnName] = _score

data.loc['sessionsCount',columnName] = countSessions( userId, False, sessionsList, _rmDF = _rmDF)

for eventName in simpleEvents:
    if eventName in craftEventCodes:
        data.loc[eventName,columnName] = getUserCraftEventsTotal(eventName, userId, sessionsList)
    else:
        data.loc[eventName,columnName] = getUserEventsTotal(eventName, userId, sessionsList)

data.loc['maxChapter', columnName] = int(pd.Series(data = 'tutorial1.Checkpoint00')\
                                         .append(getUserCheckpoints(userId, _rmDF = _rmDF))\
                                         .max()[-2:])

# time spent on each chapter
times = getCheckpointsTimesUser(userId)

completionTime = 0
chapterTime = pd.Series()
for chapter in timedSectionsIndex:
    deltaTime = times.loc[chapter,"firstCompletionDuration"].total_seconds()
    chapterTime.loc[int(chapter[-2:])] = deltaTime
    completionTime += deltaTime

# efficiency = (1 + #unlockedchapters)/(time * (1 + #death + #craft + #add + #equip))
data.loc['efficiency', columnName] = np.log(( 1 + data.loc['maxChapter', columnName] ) / \
                                    (completionTime \
                                     * ( 1\
                                        + data.loc['death', columnName] \
                                        + data.loc['craft', columnName]\
                                        + data.loc['add', columnName]\
                                        + data.loc['equip', columnName]\
                                       )\
                                    ))

playedTime = getPlayedTimeUser(userId, _rmDF = _rmDF)

data.loc['thoroughness', columnName] = \
data.loc['craft', columnName]\
* data.loc['pickup', columnName]\
* ( 1 + np.power(len(playedTime['sandbox']['daysSpent']),2))

totalSpentTime = playedTime['tutorial']['totalSpentTime'] + playedTime['sandbox']['totalSpentTime']
totalSpentDays = len(playedTime['tutorial']['daysSpent'] | playedTime['sandbox']['daysSpent'])
data.loc['fun', columnName] = np.log(\
                                max(1,\
                                    totalSpentTime.total_seconds()
                                    * np.power(totalSpentDays,2)
                                   ))

data.loc['completionTime', columnName] = completionTime
for time in chapterTime.index:
    data.loc[time,columnName] = chapterTime.loc[time]

if(len(_source) != 0):
    if(hasAnswered(userId)):
        gformLine = gform[gform[localplayerguidkey] == userId]
        afters = gformLine[gformLine[QTemporality] == answerTemporalities[1]]
        if(len(afters) > 0):
            gformLine = afters.iloc[0]
        else:
            befores = gformLine[gformLine[QTemporality] == answerTemporalities[0]]
            if(len(befores) > 0):
                gformLine = befores.iloc[len(befores)-1]
            else:
                gformLine = gformLine.iloc[len(gformLine)-1]

        # add data from the gform: binary score on each question
        gformData = getBinarized(gformLine, _source = _source)

        for question in gformData.index:
            data.loc[question,columnName] = gformData.loc[question]
    else:
        print("warning: user " + userId + " has never answered the survey")
        
print(str(data))

In [ ]:
max((1,2))

In [ ]:
max(1,(totalSpentTime.total_seconds()* np.power(totalSpentDays,2)))

In [ ]:
data.loc['fun', columnName] = np.log(max(1,totalSpentTime.total_seconds()* np.power(totalSpentDays,2)))

In [ ]:
#testUID = "bfdfd356-5d6f-4696-a2f1-c1dc338aa64b" # sessionsCount == 4
userId = getRandomGFormGUID()
getUserDataVector(userId)

In [ ]:
sessionsCounts = getUserSessionsCounts(rmdf1522)
playersResponders = sessionsCounts[sessionsCounts['userId'].isin(getAllResponders())]
len(sessionsCounts), len(playersResponders)

In [ ]:
playersResponders

In [ ]:
testUID = playersResponders[playersResponders['counts']==2]['userId'].values[0]
answerTimestamps = gform[gform[localplayerguidkey] == testUID][QTimestamp]

Making sense of temporality of answers of multi-session users

What is the behaviour of users who played multiple times?


In [ ]:
import pytz, datetime
local = pytz.timezone ("Europe/Berlin")

sample = getAllResponders()

for userId in sample:
    
    sessions = getAllSessionsOfUser(rmdf1522,userId)
    
    if(len(sessions) > 1):
        
        print("------------------user " + userId + " ------------------")
        print()
        answerTimestamps = gform[gform[localplayerguidkey] == userId][QTimestamp]
        
        for sessionIndex in sessions.index:
            sessionId = sessions.loc[sessionIndex, 'sessionId']

            _logs = rmdf1522[rmdf1522['sessionId'] == sessionId]
            _logs = _logs[_logs.index.isin(_logs['section'].dropna().index)]
            _timedEvents = _logs['userTime']
            _timedEvents = _timedEvents.sort_values()

            print("session " + str(sessionIndex))
            if(len(_timedEvents) > 0):
                print("\tstart: " + str(_timedEvents[0]))
                print("\tend: " + str(_timedEvents[-1]))
            print()

            for answerTimestampIndex in answerTimestamps.index:
                survey = answerTimestamps.loc[answerTimestampIndex]
                utc_dt = survey.astimezone (pytz.utc)
                
                print("\tsurvey" + str(answerTimestampIndex))
                print("\t" + str(utc_dt))

                if(len(_timedEvents) > 0):
                    if((_timedEvents[0] > utc_dt) and (_timedEvents[-1] > utc_dt)):
                         print("\tanswered before playing")
                    elif((_timedEvents[0] < utc_dt) and (_timedEvents[-1] < utc_dt)):
                        print("\tanswered after playing")
                    else:
                        print("\tundefined: overlap")
                    print("\t" + str((_timedEvents[0] > utc_dt, _timedEvents[-1] > utc_dt)))
                else:
                    print("\tundefined: no event")
                         
                print()
                print()

            print()
            print()
            print()

In [ ]:
_logs = rmdf1522[rmdf1522['sessionId'] == sessionId][['type', 'userTime', 'section']].values[0]
_logs

In [ ]:
_timedEvents[0], _timedEvents[-1], survey

In [ ]:
survey < _timedEvents[0], survey < _timedEvents[-1]

In [ ]:
str((_timedEvents[0] < survey, _timedEvents[-1] > survey))

In [ ]:
times

In [ ]:
eventName

In [ ]:
getUserSectionsEvents( 'start', userId, sessionsList )

In [ ]:
perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]