In [ ]:
%run "../Functions/2.1 Sampling.ipynb"
print("3. Per session and per user analysis")
In [ ]:
perSessionRelevantColumns = ['sessionId', 'serverTime', 'section']
#reachEvents = rmdf1522[rmdf1522['type']=='reach'].loc[:,perSessionRelevantColumns]
#deathEvents = rmdf1522[rmdf1522['type']=='death'].loc[:,perSessionRelevantColumns]
timedSectionsIndex = checkpointArrayStr
timedSectionsReachedColumns = ['firstReached', 'firstCompletionDuration']
timedSectionsDeathsColumns = ['deathsCount']
eventSectionsCountColumns = ['section', 'count']
eventSectionsColumns = ['count']
In [ ]:
## Comparison between game and Google form performance
In [ ]:
# Returns a given session's checkpoints, the first server time at which they were reached, and completion time
def getCheckpointsCompletionTimes( sessionId, _rmDF, defaultTime=pd.Timedelta.max):
reachEvents = _rmDF[_rmDF['type']=='reach'].loc[:,perSessionRelevantColumns]
perSession = reachEvents[reachEvents['sessionId']==sessionId]
perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]
timedSections = pd.DataFrame(data=0, columns=timedSectionsReachedColumns,index=timedSectionsIndex)
timedSections['firstReached'] = pd.Timestamp(0, tz='utc')
timedSections['firstCompletionDuration'] = pd.Timedelta.max
if(len(perSession) > 0):
timedSections["firstReached"] = perSession.groupby("section").agg({ "serverTime": np.min })
timedSections["firstCompletionDuration"] = timedSections["firstReached"].diff()
if(timedSections.loc[tutorialStem + checkpointStem + "00","firstReached"] != pd.Timestamp(0, tz='utc')):
timedSections.loc[tutorialStem + checkpointStem + "00","firstCompletionDuration"] = \
pd.Timedelta(0)
timedSections["firstReached"] = timedSections["firstReached"].fillna(pd.Timestamp(0, tz='utc'))
timedSections["firstCompletionDuration"] = timedSections["firstCompletionDuration"].fillna(defaultTime)
return timedSections
In [ ]:
# Returns a given user's checkpoints, the first server time at which they were reached, and completion time
def getCheckpointsCompletionTimesUser( userId, _rmDF, _sessionsList = []):
# List of associated sessions
if( len(_sessionsList) == 0):
_sessionsList = getUserSessions(_rmDF, userId)
# Call getCheckpointsCompletionTimes on all sessions associated with user,
# then merge by taking oldest checkpoint completion
_timedSections = pd.DataFrame(data=0, columns=timedSectionsReachedColumns,index=timedSectionsIndex)
_timedSections["firstReached"] = pd.Timestamp(0, tz='utc')
_timedSections["firstCompletionDuration"] = pd.Timedelta.max
# merge
# for each checkpoint reached, update if necessary
for _sessionId in _sessionsList:
_thisSessionTimes = getCheckpointsCompletionTimes( _sessionId, _rmDF = _rmDF )
for _checkpointName in _thisSessionTimes.index:
if ((_thisSessionTimes.loc[_checkpointName, 'firstReached'] != pd.Timestamp(0, tz='utc'))
and
((_timedSections.loc[_checkpointName, 'firstReached'] == pd.Timestamp(0, tz='utc'))
or (_timedSections.loc[_checkpointName, 'firstReached'] > _thisSessionTimes.loc[_checkpointName, 'firstReached']))
):
_timedSections.loc[_checkpointName, 'firstReached'] = _thisSessionTimes.loc[_checkpointName, 'firstReached']
_timedSections.loc[_checkpointName, 'firstCompletionDuration'] = _thisSessionTimes.loc[_checkpointName, 'firstCompletionDuration']
return _timedSections
In [ ]:
# Returns a given session's checkpoints and time spent on each checkpoint
def getCheckpointsTotalTimes( sessionId, _rmDF):
# TODO FIXME better version:
# take type into account, especially events of survey opening
# take NaN sections into account
# otherwise, game time is added without actual play
sessionEvents = _rmDF[_rmDF["sessionId"] == sessionId]
timedSectionnedEvents = sessionEvents[sessionEvents['section'].isin(timedSectionsIndex)]
timedSectionnedEvents = timedSectionnedEvents.loc[:,["section","userTime"]]
timedSectionnedEvents = timedSectionnedEvents.sort_values(by="userTime")
totalTimes = pd.Series(index=timedSectionsIndex, data=pd.Timedelta(0))
if (len(timedSectionnedEvents) > 0):
#print("len(timedSectionnedEvents) > 0")
position = 0
section = timedSectionnedEvents.iloc[position, :]["section"]
userTime = timedSectionnedEvents.iloc[position, :]["userTime"]
#_progress = IntProgress(min=0, max=len(timedSectionnedEvents))
#display(_progress)
while position < len(timedSectionnedEvents):
#_progress.value += 1
#_progress.description = str(_progress.value) + "/" + str(len(timedSectionnedEvents))
if (section != timedSectionnedEvents.iloc[position, :]["section"]):
deltaT = timedSectionnedEvents.iloc[position, :]["userTime"] - userTime
#print("step" + str(_progress.value) + ": update section " + section + " by " + str(deltaT))
totalTimes[section] += deltaT
section = timedSectionnedEvents.iloc[position, :]["section"]
userTime = timedSectionnedEvents.iloc[position, :]["userTime"]
position += 1
deltaT = timedSectionnedEvents.iloc[position-1, :]["userTime"] - userTime
#print("step" + str(_progress.value) + ": update section " + section + " by " + str(deltaT))
totalTimes[section] += deltaT
return totalTimes
In [ ]:
# Returns a given user's checkpoints, the first server time at which they were reached, and completion time
def getCheckpointsTotalTimesUser( userId, _rmDF, _sessionsList = []):
# List of associated sessions
if( len(_sessionsList) == 0):
_sessionsList = getUserSessions(_rmDF, userId)
totalTimes = pd.Series(index=timedSectionsIndex, data=pd.Timedelta(0))
# Call getCheckpointsCompletionTimes on all sessions associated with user,
# then merge by adding
# _progress = IntProgress(min=0, max=len(_sessionsList))
# display(_progress)
for _sessionId in _sessionsList:
totalTimes += getCheckpointsTotalTimes(_sessionId, _rmDF)
# _progress.value += 1
return totalTimes
In [ ]:
def getPlayedTimeSessionMode(sessionEvents, mode, strictEvents=True, strictSection=True):
sessionEvents = sessionEvents[sessionEvents['section'].str.startswith(mode, na=(not strictSection))]
if strictEvents:
sessionEvents = sessionEvents[~sessionEvents["type"].isin(noSectionEventCodes)]
sessionTimes = sessionEvents['userTime']
sessionTimes.index = sessionTimes.values
daysSpent = set()
totalSpentTime = pd.Timedelta(0)
if(len(sessionTimes) > 0):
sessionTimes = sessionTimes.groupby(pd.TimeGrouper('D')).agg({ "start": np.min, "end": np.max })
daysSpent = set(sessionTimes.index)
sessionTimes['played'] = sessionTimes['end'] - sessionTimes['start']
totalSpentTime = sessionTimes['played'].sum()
return {'daysSpent': daysSpent, 'totalSpentTime': totalSpentTime}
In [ ]:
# Returns a given session's total playtime and day count
def getPlayedTimeSession( sessionId, _rmDF):
sessionEvents = _rmDF[_rmDF['sessionId']==sessionId]
tutorialTime = getPlayedTimeSessionMode(sessionEvents, 'tutorial', strictSection=False)
sandboxTime = getPlayedTimeSessionMode(sessionEvents, 'sandbox')
return {'tutorial': tutorialTime, 'sandbox': sandboxTime}
In [ ]:
def mergePlayedTimes(a, b):
result = a.copy()
for gameMode in a:
result[gameMode] = {
'totalSpentTime': a[gameMode]['totalSpentTime'] + b[gameMode]['totalSpentTime'],
'daysSpent': a[gameMode]['daysSpent'] | b[gameMode]['daysSpent'],
}
return result
In [ ]:
# Returns a given user's total playtime and day count
def getPlayedTimeUser( userId, _rmDF, _sessionsList = []):
result = getPlayedTimeSession('', _rmDF = _rmDF)
if(len(_sessionsList) == 0):
_sessionsList = getUserSessions(_rmDF, userId)
for session in _sessionsList:
playedTimes = getPlayedTimeSession(session, _rmDF)
result = mergePlayedTimes(result, playedTimes)
return result
In [ ]:
# Returns a given session's checkpoints, and death count
def getDeaths( sessionId, _rmDF):
deathEvents = _rmDF[_rmDF['type']=='death'].loc[:,perSessionRelevantColumns]
perSession = deathEvents[deathEvents['sessionId']==sessionId]
perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]
deathsSections = perSession.groupby("section").size().reset_index(name='deathsCount')
return deathsSections
In [ ]:
def getDeathsUser( userId, _rmDF):
#print("getDeathsUser(" + str(userId) + ")")
# List of associated sessions
sessionsList = getUserSessions(_rmDF, userId)
#print("sessionsList=" + str(sessionsList))
# Call getDeaths on all sessions associated with user,
# then merge by adding
deathsSections = pd.DataFrame(0, columns=timedSectionsDeathsColumns,index=timedSectionsIndex)
for sessionId in sessionsList:
#print("processing user " + str(userId) + " with session " + str(sessionId))
deaths = getDeaths( sessionId )
# merge
# for each checkpoint reached, update if necessary
for index in deaths.index:
#print("index=" + str(index))
checkpointName = deaths['section'][index]
#print("checkpointName=" + str(checkpointName))
#print("deaths['deathsCount']["+str(index)+"]=" + str(deaths['deathsCount'][index]))
deathsSections['deathsCount'][checkpointName] = deathsSections['deathsCount'][checkpointName] + deaths['deathsCount'][index]
return deathsSections
In [ ]:
# Static data
# craftEventsColumns = pd.DataFrame(
# index=list(range(4)),
# data={
# 'eventCode' : pd.Categorical(["equip","unequip","add","remove"]),
# 'eventType' : pd.Categorical(["add","remove","add","remove"]),
# 'column' : pd.Categorical(["customData.device","customData.device","customData.biobrick","customData.biobrick"]),
# }
#)
#craftEventsColumns
In [ ]:
# Static data
craftEventCodes = list(["equip","unequip","add","remove"])
craftEventsColumns = pd.DataFrame(
index=craftEventCodes,
data={
'eventType' : pd.Categorical(["add","remove","add","remove"]),
'column' : pd.Categorical(["customData.device","customData.device","customData.biobrick","customData.biobrick"]),
}
)
In [ ]:
# Returns a given session's checkpoints, and event count
# eventCode != RedMetrics' eventType
# eventCodes are craftEventsColumns' index
def getSectionsCraftEvents( eventCode, sessionId, _rmDF):
#print("getSectionsCraftEvents(" + str(eventCode) + "," + str(sessionId) + ")")
sectionsEvents = pd.DataFrame(0, columns=eventSectionsCountColumns, index=range(0))
if eventCode in craftEventCodes:
eventType = craftEventsColumns['eventType'][eventCode]
events = _rmDF[_rmDF['type']==eventType]
events = events[events[craftEventsColumns['column'][eventCode]].notnull()]
#print("getSectionsCraftEvents(" + str(eventCode) + "," + str(sessionId) + "): #events=" + str(len(events)))
#print("events=" + str(events.head()))
events = events.loc[:,perSessionRelevantColumns]
perSession = events[events['sessionId']==sessionId]
perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]
sectionsEvents = perSession.groupby("section").size().reset_index(name='count')
else:
print("incorrect event code '" + eventCode + "'")
return sectionsEvents
In [ ]:
# eventCode != RedMetrics' eventType
# eventCodes are craftEventsColumns' index
def getUserSectionsCraftEvents( eventCode, userId, _rmDF, sessionsList = []):
#print("getUserSectionsCraftEvents(" + str(eventCode) + "," + str(userId) + ")")
# Call getSectionsEvents on all sessions associated with user,
# then merge by adding
userSectionsEvents = pd.DataFrame(0, columns=eventSectionsColumns,index=timedSectionsIndex)
if eventCode in craftEventCodes:
# List of associated sessions
if(len(sessionsList) == 0):
sessionsList = getUserSessions(_rmDF, userId)
#print("sessionsList=" + str(sessionsList))
for sessionId in sessionsList:
sessionSectionsEvents = getSectionsCraftEvents( eventCode, sessionId )
# merge
# for each checkpoint reached, update if necessary
for index in sessionSectionsEvents.index:
checkpointName = sessionSectionsEvents['section'][index]
userSectionsEvents['count'][checkpointName] = userSectionsEvents['count'][checkpointName] + sessionSectionsEvents['count'][index]
else:
print("incorrect event code '" + eventCode + "'")
return userSectionsEvents
In [ ]:
# eventCode != RedMetrics' eventType
# eventCodes are craftEventsColumns' index
def getUserSectionsCraftEventsTotal( eventCode, userId, _rmDF, sessionsList = [] ):
#print("getUserSectionsCraftEventsTotal(" + str(eventCode) + "," + str(userId) + ")")
events = getUserSectionsCraftEvents( eventCode, userId, _rmDF, sessionsList )
return events.values.sum()
In [ ]:
# eventCode != RedMetrics' eventType
# eventCodes are craftEventsColumns' index
def getUserCraftEventsTotal( eventCode, userId, _rmDF, sessionsList=[]):
if(len(sessionsList) == 0):
sessionsList = getUserSessions(_rmDF, userId)
if eventCode in craftEventCodes:
eventType = craftEventsColumns['eventType'][eventCode]
events = _rmDF[_rmDF['type']==eventType]
events = events[events[craftEventsColumns['column'][eventCode]].notnull()]
perSession = events[events['sessionId'].isin(sessionsList)]
return len(perSession)
else:
print("incorrect event code '" + eventCode + "'")
return 0
In [ ]:
# Returns a given session's checkpoints, and event count
def getSectionsEvents( eventType, sessionId, _rmDF):
events = _rmDF[_rmDF['type']==eventType].loc[:,perSessionRelevantColumns]
perSession = events[events['sessionId']==sessionId]
perSession = perSession[perSession['section'].str.startswith('tutorial', na=False)]
sectionsEvents = perSession.groupby("section").size().reset_index(name='count')
return sectionsEvents
In [ ]:
def getUserSectionsEvents( eventType, userId, _rmDF, sessionsList=[]):
# List of associated sessions
if(len(sessionsList) == 0):
sessionsList = getUserSessions(_rmDF, userId)
# Call getSectionsEvents on all sessions associated with user,
# then merge by adding
userSectionsEvents = pd.DataFrame(0, columns=eventSectionsColumns,index=timedSectionsIndex)
for sessionId in sessionsList:
sessionSectionsEvents = getSectionsEvents( eventType, sessionId )
# merge
# for each checkpoint reached, update if necessary
for index in sessionSectionsEvents.index:
checkpointName = sessionSectionsEvents['section'][index]
userSectionsEvents['count'][checkpointName] = userSectionsEvents['count'][checkpointName] + sessionSectionsEvents['count'][index]
return userSectionsEvents
In [ ]:
def getUserSectionsEventsTotal( eventType, userId, _rmDF, sessionsList=[] ):
events = getUserSectionsEvents( eventType, userId, _rmDF, sessionsList )
return events.values.sum()
In [ ]:
def getUserEventsTotal( eventType, userId, _rmDF, sessionsList=[]):
if(len(sessionsList) == 0):
sessionsList = getUserSessions(_rmDF, userId)
sessionEvents = _rmDF[_rmDF['type']==eventType]
perSession = sessionEvents[sessionEvents['sessionId'].isin(sessionsList)]
return len(perSession)
In [ ]:
# Returns a given user's unique reached checkpoints
def getUserCheckpoints( userId, _rmDF):
#print("getUserCheckpoints(" + str(userId) + ")")
# List of associated sessions
sessionsList = getUserSessions(_rmDF, userId)
#print("sessionsList=" + str(sessionsList))
# List all 'reach' events with those sessionIds.
reachEvents = _rmDF[_rmDF['type']=='reach'].loc[:,perSessionRelevantColumns]
perUser = reachEvents[reachEvents['sessionId'].isin(sessionsList)]
perUser = perUser[perUser['section'].str.startswith('tutorial', na=False)]
return pd.Series(perUser['section'].unique())
def getDiscrepancyGameGForm( userId ):
gformNonVal = getNonValidatedCheckpoints(userId)
gformVal = getValidatedCheckpoints(userId)
gameVal = getUserCheckpoints(userId)
#sorted, unique values in series1 that are not in series2
#np.setdiff1d(series1.values, series2.values)
#user has answered questions whose answer they haven't seen in the game
gameNotEnough = pd.Series(np.setdiff1d(gformVal.values, gameVal.values))
#user has not answered questions whose answer they have seen in the game
gformNotEnough = []
maxGameVal = ''
if gameVal.values.size!=0:
gameVal.values.max()
for nonVal in gformNonVal.values:
if nonVal >= maxGameVal:
gformNotEnough.append(nonVal)
gformNotEnough = pd.Series(gformNotEnough)
return (gameNotEnough, gformNotEnough)
In [ ]:
# Static data
noSectionEventCodes = list([
'configure',
'gotomooc',
'gotostudy',
'gotourl',
'restart',
'selectmenu',
'start',
'switch',
])
In [ ]:
simpleEvents = [
'complete',
'configure',
'craft',
'death',
'equip',
'unequip',
'add',
'remove',
'gotomooc',
'gotourl',
'pickup',
'reach',
'restart',
'selectmenu',
'start',
'switch',
]
# possible events: complete configure craft death equip gotomooc gotourl pickup reach restart selectmenu start switch unequip
userDataVectorIndex = [#game
'sessionsCount',
]
for temporality in answerTemporalities:
userDataVectorIndex.append(scoreLabel + temporality)
userDataVectorIndex = np.concatenate( (userDataVectorIndex,
simpleEvents))
In [ ]:
#allEvents = rmdf1522['type'].unique()
#allEvents = np.concatenate( simpleEvents, allEvents ).unique()
#allUserDataVectorIndex = np.concatenate( userDataVectorIndex, allEvents ).unique()
In [ ]:
overallScoreCriteria = ["scorepretest", "scoreposttest", "scoredelta",]
stemTimesCriteria = ["ch" + "{0:0=2d}".format(i) for i in range(0,15)]
completionTimesCriteria = [st + "completion" for st in stemTimesCriteria] + ["completionTime"]
totalTimesCriteria = [st + "total" for st in stemTimesCriteria] + ["totalTime"]
In [ ]:
# userId is RedMetrics user id
# _source is used as correction source, if we want to include answers to these questions
def getUserDataVector(userId, _rmDF, _gfDF, _source = correctAnswers, _printDebug = True, _binary=True):
sessionsList = getUserSessions(_rmDF, userId)
columnName = str(userId)
data = pd.DataFrame(0, columns=[columnName], index=userDataVectorIndex)
score = getScore(userId, _gfDF)
for _temporality in score.columns:
_score = score.loc[scoreLabel,_temporality]
if(len(_score)>0):
if(_temporality == answerTemporalities[0]):
_score = _score[len(_score)-1]
else:
_score = _score[0]
else:
_score = np.nan
data.loc[scoreLabel+_temporality, columnName] = _score
data.loc[scoreLabel+"delta", columnName] = data.loc[scoreLabel+"posttest", columnName] - data.loc[scoreLabel+"pretest", columnName]
data.loc['sessionsCount',columnName] = len(sessionsList)
for eventName in simpleEvents:
if eventName in craftEventCodes:
data.loc[eventName,columnName] = getUserCraftEventsTotal(eventName, userId, _rmDF, sessionsList)
else:
data.loc[eventName,columnName] = getUserEventsTotal(eventName, userId, _rmDF, sessionsList)
data.loc['maxChapter', columnName] = int(pd.Series(data = tutorialStem + checkpointStem + '00')\
.append(getUserCheckpoints(userId, _rmDF = _rmDF))\
.max()[-2:])
# time spent on each chapter
completionTimes = getCheckpointsCompletionTimesUser(userId, _rmDF = _rmDF)
completionTime = 0
checkpointCompletionTime = pd.Series()
for checkpoint in timedSectionsIndex:
deltaTime = completionTimes.loc[checkpoint,"firstCompletionDuration"].total_seconds()
checkpointCompletionTime.loc["ch" + (checkpoint[-2:]) + "completion"] = deltaTime
completionTime += deltaTime
# efficiency = (1 + #unlockedchapters)/(time * (1 + #death + #craft + #add + #equip))
data.loc['efficiency', columnName] = np.log(( 1 + data.loc['maxChapter', columnName] ) / \
(completionTime \
* ( 1\
+ data.loc['death', columnName] \
+ data.loc['craft', columnName]\
+ data.loc['add', columnName]\
+ data.loc['equip', columnName]\
)\
))
playedTime = getPlayedTimeUser(userId, _rmDF = _rmDF)
data.loc['thoroughness', columnName] = \
data.loc['craft', columnName]\
* data.loc['pickup', columnName]\
* ( 1 + np.power(len(playedTime['sandbox']['daysSpent']),2))
totalSpentTime = playedTime['tutorial']['totalSpentTime'] + playedTime['sandbox']['totalSpentTime']
totalSpentDays = len(playedTime['tutorial']['daysSpent'] | playedTime['sandbox']['daysSpent'])
data.loc['fun', columnName] = np.log(\
max(1,\
totalSpentTime.total_seconds()
* np.power(totalSpentDays,2)
))
data.loc['completionTime', columnName] = completionTime
for time in checkpointCompletionTime.index:
data.loc[time,columnName] = checkpointCompletionTime.loc[time]
totalTimes = getCheckpointsTotalTimesUser(userId, _rmDF = _rmDF)
for checkpoint in timedSectionsIndex:
data.loc["ch" + (checkpoint[-2:]) + "total",columnName] = totalTimes[checkpoint].total_seconds()
data.loc["totalTime",columnName] = totalTimes.sum().total_seconds()
emptyAnswer = _gfDF.iloc[0].copy()
emptyAnswer[:] = np.nan
if(len(_source) != 0):
if hasAnswered(userId, _gfDF):
gformLine = _gfDF[_gfDF[localplayerguidkey] == userId]
gformLinePretest = gformLine.iloc[0]
gformLinePosttest = gformLine.iloc[0]
pretests = gformLine[gformLine[QTemporality] == answerTemporalities[0]]
posttests = gformLine[gformLine[QTemporality] == answerTemporalities[1]]
undefined = gformLine[gformLine[QTemporality] == answerTemporalities[2]]
if (len(pretests) > 0) & (len(posttests) > 0):
# take last pretest and first posttest
# TODO add date/time checks
gformLinePretest = pretests.iloc[-1]
gformLinePosttest = posttests.iloc[0]
elif (len(posttests) > 0):
if _printDebug:
print("warning: no pretest for u="+userId)
gformLinePretest = emptyAnswer
gformLinePosttest = posttests.iloc[0]
elif (len(pretests) > 0):
if _printDebug:
print("warning: no posttest for u="+userId)
gformLinePosttest = emptyAnswer
else:
if _printDebug:
print("warning: only undefined survey answers for u="+userId)
gformLinePretest = emptyAnswer
gformLinePosttest = emptyAnswer
# add data from the gform: binary/numeric score on each question
gformDataPretest = []
gformDataPosttest = []
if _binary:
gformDataPretest = getBinarized(gformLinePretest, _source = _source)
gformDataPosttest = getBinarized(gformLinePosttest, _source = _source)
else:
gformDataPretest = getNumeric(gformLinePretest, _source = _source)
gformDataPosttest = getNumeric(gformLinePosttest, _source = _source)
gformDataDelta = gformDataPosttest - gformDataPretest
for question in gformDataPretest.index:
data.loc[answerTemporalities[0] + " " + question,columnName] = gformDataPretest.loc[question]
for question in gformDataPretest.index:
data.loc[answerTemporalities[1] + " " + question,columnName] = gformDataPosttest.loc[question]
for question in gformDataPretest.index:
data.loc["delta " + question,columnName] = gformDataDelta.loc[question]
else:
if _printDebug:
print("warning: user " + userId + " has never answered the survey")
return data
In [ ]:
# for per-session, manual analysis
def getSessionDataPreview( _sessionId, _rmDF):
_logs = _rmDF[_rmDF['sessionId'] == _sessionId]
_timedEvents = _logs['userTime']
_timedEvents = _timedEvents.sort_values()
_platform = _logs['customData.platform'].dropna().values
if(len(_platform) > 0):
_platform = _platform[0]
else:
_platform = ''
_events = _logs['type'].value_counts()
return {
'first' : _timedEvents.iloc[0],
'last' : _timedEvents.iloc[-1],
'platform' : _platform,
'events' : _events
}
In [ ]:
# for per-user, manual analysis
def getUserDataPreview(userId, _rmDF, _gfDF):
# [ ] RM
# [ ] sessions count
# [ ] first event date
# [ ] time played
# [ ] dates played
# [ ] first played, last played
# [ ] best chapter
# [ ] counts of events: deaths, crafts,...
# [ ] gaming platform
# [ ] GF
# [ ] score(s)
# [ ] progression
# [ ] temporality
# [ ] temporality according to answers
# [ ] #before
# [ ] #after
# [ ] demographics
result = pd.DataFrame(
columns = [userId]
)
# [ ] RM
result.loc['REDMETRICS ANALYSIS'] = ' '
# [ ] sessions count
sessions = getUserSessions(_rmDF, userId)
result.loc['sessions', userId] = len(sessions)
# [ ] first event date
result.loc['firstEvent', userId] = getFirstEventDate( userId )
# [ ] time played
# [ ] dates played
# [ ] first played, last played
sessionIds = sessions['sessionId']
for _sessionIdIndex in range(0, len(sessions['sessionId'])):
_sessionId = sessionIds.iloc[_sessionIdIndex]
sdp = getSessionDataPreview(_sessionId, _rmDF = _rmDF)
result.loc['session' + str(_sessionIdIndex) + ' platform',userId] = sdp['platform']
result.loc['session' + str(_sessionIdIndex) + ' first',userId] = sdp['first']
result.loc['session' + str(_sessionIdIndex) + ' last',userId] = sdp['last']
result.loc['session' + str(_sessionIdIndex) + ' events',userId] = str(sdp['events'])
# [ ] best chapter
# [ ] counts of events: deaths, crafts,...
# [ ] GF
result.loc['GFORM ANALYSIS'] = ' '
# [ ] score(s)
score = getScore(userId, _gfDF)
for _temporality in score.columns:
_score = score.loc[scoreLabel,_temporality]
if(len(_score)>0):
if(_temporality == answerTemporalities[0]):
_score = _score[len(_score)-1]
else:
_score = _score[0]
else:
_score = np.nan
result.loc[scoreLabel+_temporality,userId] = _score
# [ ] progression
# [ ] demographics
result.loc[scoreLabel+'s',userId] = str(score.values)
gfDataPreview = getGFormDataPreview(userId, _gfDF)
features = {1: 'date', 2: 'temporality RM', 3: 'temporality GF', 4: 'score', 5: 'genderAge'}
for key in gfDataPreview:
for featureKey in features:
result.loc[key + ' ' + features[featureKey]] = str(gfDataPreview[key][features[featureKey]])
index = 0
for match in gfDataPreview[key]['demographic matches']:
result.loc[key + ' demographic match ' + str(index)] = repr(match)
index += 1
return result
In [ ]:
def getRecordPlayer(rmdf, gfdf):
newownrecords = rmdf[rmdf['type'] == 'newownrecord']
recordDuration = pd.Timedelta.max
recordSessionId = ''
sessions = newownrecords[newownrecords['customData.chapter'] == '"10"']['sessionId']
#print("#sessions="+str(len(sessions)))
for sessionId in sessions:
#print(".", end="", flush=True)
#print(str((len(sessionRecords['customData.chapter'].unique()) > 0)))
sessionRecords = newownrecords[newownrecords['sessionId'] == sessionId]
#print("#sessionRecords="+str(len(sessionRecords)))
chaptersRows = sessionRecords[sessionRecords['customData.chapter'].isin(chapterArrayStr)]
#print("#chaptersRows="+str(len(chaptersRows)))
chaptersRowsChapters = sorted(chaptersRows['customData.chapter'].unique())
#print("#chaptersRowsChapters="+str(len(chaptersRowsChapters)))
hasChapters = (len(chaptersRowsChapters) > 0)
hasAllChapters = False
if hasChapters:
hasAllChapters = (chaptersRowsChapters == chapterArrayStr)
#print("hasChapters="+str(hasChapters)+", hasAllChapters="+str(hasAllChapters))
#print(str(chaptersRowsChapters))
if hasAllChapters:
#print("complete")
duration = pd.Timedelta(seconds=sum([int(t.replace('"', '')) for t in sessionRecords['customData.duration'].values]))
if duration < recordDuration:
recordDuration = duration
recordSessionId = sessionId
recordTime = rmdf[rmdf['sessionId']==recordSessionId]['userTime'].max().tz_convert('Europe/Berlin')
recordUserId = rmdf[rmdf['sessionId']==recordSessionId]['userId'].unique()[0]
recordPlatform = rmdf[rmdf['userId']==recordUserId]['customData.platform'].dropna().iloc[0]
recordAnswers = gfdf[gfdf[QUserId] == recordUserId]
recordGender = recordAnswers.iloc[0][QGender]
recordAge = recordAnswers.iloc[0][QAge]
recordLanguage = recordAnswers.iloc[0][QLanguage]
return recordDuration, recordTime, recordAge, recordGender, recordLanguage, recordPlatform, recordUserId, recordSessionId
In [ ]:
# shortest NaN chapter newownrecord: shows that NaN doesn't mean "whole completion time"
newownrecords = rmdf1522[rmdf1522['type'] == 'newownrecord']
nanChapternewownrecords = newownrecords[pd.isnull(newownrecords['customData.chapter'])]
#section?
recordDuration = pd.Timedelta.max
recordSessionId = ''
for sessionId in newownrecords[pd.isnull(newownrecords['customData.chapter'])]['sessionId']:
#print(".", end="", flush=True)
#print(str((len(sessionRecords['customData.chapter'].unique()) > 0)))
sessionRecords = newownrecords[newownrecords['sessionId'] == sessionId]
duration = pd.Timedelta(seconds=int(sessionRecords[pd.isnull(sessionRecords['customData.chapter'])]['customData.duration'].values[0].replace('"','')))
if duration < recordDuration:
recordDuration = duration
recordSessionId = sessionId
recordDuration, recordSessionId