In [9]:
%matplotlib inline
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
In [10]:
df100 = pd.read_csv("data/1.0.csv")
df131 = pd.read_csv("data/1.31.csv")
#df132 = pd.read_csv("data/1.32.csv")
df132 = pd.read_csv("data/2016-01-26-hc-1.32.csv")
dftest = pd.read_csv("data/2016-01-26-hc-test.csv")
In [11]:
percentageBarGraphMode = False
barGraphMode = False
percentageDefaultGraphMode = False
defaultGraphMode = True
In [12]:
relevantColumns = ['id', 'serverTime', 'playerId', 'type', 'section']
#part100 = df100.loc[:,relevantColumns]
part131 = df131.loc[:,relevantColumns]
part132 = df132.loc[:,relevantColumns]
parttest = dftest.loc[:,relevantColumns]
#df = pd.concat([part100, part131, part132])
#df = pd.concat([part131, part132])
df = pd.concat([part131, part132, parttest])
df[df["type"]=="death"]["playerId"].value_counts().sort_values(ascending=False).head()
#df[df["playerId"]=="eabf68ed-2e17-4e23-8f3f-a8b9e57ce989"]
Out[12]:
Generic graph treatment
In [13]:
%run plotUtilities.ipynb
In [14]:
gameSessionCount = df["playerId"].nunique()
print 'gameSessionCount={0}'.format(gameSessionCount)
events = ['death', 'pickup', 'equip', 'craft', 'complete', 'switch', 'reach']
#events = ['complete']
graphLabels = pd.Series(['deaths','picked up devices','equipped devices',
'crafted devices','completed games', 'switched modes', 'reached checkpoints'],index=events)
#graphLabels = pd.Series(['completed games'],index=events)
# legacy
alternates = pd.Series([[],[],[],[],['completed'],['switched:'],['reached']],index=events)
#alternates = pd.Series([['completed']],index=events)
#alternates = pd.Series([[]],index=events)
for event in events:
studiedEvent = event
columnName = studiedEvent+"s"
graphLabel = graphLabels[event]
studiedEventCount = df[df["type"]==studiedEvent]["id"].count()
# legacy
for alternate in alternates[studiedEvent]:
studiedEventCount = studiedEventCount + df[df["type"]==alternate]["id"].count()
print '#{0}={1}'.format(studiedEvent,studiedEventCount)
mean = studiedEventCount / float(gameSessionCount)
print 'mean#{0}={1}'.format(studiedEvent, mean)
positiveGameSessions = df[df["type"]==studiedEvent]["playerId"].value_counts()
# legacy
for alternate in alternates[studiedEvent]:
alternateTable = df[df["type"]==alternate]["playerId"].value_counts()
positiveGameSessions = positiveGameSessions.combine_first(alternateTable)
positiveGameSessions = positiveGameSessions.sort_values(ascending=False)
positiveGameSessionCount = positiveGameSessions.count()
positiveGameSessionCountTable = pd.DataFrame({columnName: positiveGameSessions.values})
genericTreatment(positiveGameSessionCountTable, "game sessions", graphLabel,
positiveGameSessionCount)
positiveGameSessionCountTable.describe()
negativeGameSessionCountSeries = pd.Series(0, index=(list(range(positiveGameSessionCount,gameSessionCount-1))))
negativeGameSessionCountTable = pd.DataFrame({columnName: negativeGameSessionCountSeries})
fullEventGameSessionCountTable = pd.concat([positiveGameSessionCountTable, negativeGameSessionCountTable])
genericTreatment(fullEventGameSessionCountTable, "game sessions", graphLabel,
gameSessionCount)
fullEventGameSessionCountTable.describe()
In [15]:
eventstable = pd.DataFrame({"events" : df["playerId"].value_counts().values})
eventsLabel = "events"
totalEventsCount = int(eventstable.count())
genericTreatment(eventstable, "game sessions", eventsLabel, totalEventsCount)
print totalEventsCount
print pd.DataFrame({"events" : df131["playerId"].value_counts().values}).count()
print pd.DataFrame({"events" : df132["playerId"].value_counts().values}).count()
In [16]:
#checkpointsDF = dftest
#checkpointsDF = df132
checkpointsDF = pd.concat([part132, parttest])
checkpointsGameSessionCount = checkpointsDF["playerId"].nunique()
checkpoints = checkpointsDF[checkpointsDF["type"]=="reach"]["section"].value_counts()
#for alternate in alternates["reach"]:
# checkpoints = checkpoints + checkpointsDF[checkpointsDF["type"]==alternate]["section"].value_counts()
checkpoints['Checkpoint00'] = None
checkpoints = checkpoints.sort_index()
totalCheckpointsCount = checkpoints.sum()
print '\ncheckpoints=\n{0}'.format(checkpoints)
print '\ntotalCheckpointsCount={0}'.format(totalCheckpointsCount)
reachedGameSessions = checkpointsDF[checkpointsDF["type"]=="reach"]["playerId"].value_counts()
#for alternate in alternates["reach"]:
# checkpoints = checkpoints + checkpointsDF[checkpointsDF["type"]==alternate]["playerId"].value_counts()
reachedGameSessionCount = reachedGameSessions.count()
print '\nreachedGameSessionCount={0}'.format(reachedGameSessionCount)
neverReachedGameSessionCount = checkpointsGameSessionCount - reachedGameSessionCount
print '\nneverReachedGameSessionCount={0}'.format(neverReachedGameSessionCount)
mostReachedCheckpointCount = checkpoints.values.max()
print '\nmostReachedCheckpointCount={0}'.format(mostReachedCheckpointCount)
checkpointstable = pd.DataFrame({"checkpoints" : checkpoints.values})
#reachedGameSessionCount
genericTreatment( checkpointstable, "checkpoints reached", "percentage of most reached checkpoint", 0, mostReachedCheckpointCount, False, True )
#plt.ylim(0,mostReachedCheckpointCount)
fullCheckpointsTable = checkpointstable
fullCheckpointsTable['checkpoints'][0] = checkpointsGameSessionCount
fullMostReachedCheckpointCount = fullCheckpointsTable.values.max()
print '\nfullMostReachedCheckpointCount={0}'.format(fullMostReachedCheckpointCount)
print '\nfullCheckpointsTable=\n{0}'.format(fullCheckpointsTable)
#gameSessionCount
genericTreatment( fullCheckpointsTable, "checkpoints reached", "percentage of most reached checkpoint", 0, fullMostReachedCheckpointCount, False, True )
#plt.ylim(0,fullMostReachedCheckpointCount)
checkpointsByGameSession = checkpointsDF[checkpointsDF["type"]=="reach"].loc[:,['section','playerId']]
#for alternate in alternates["reach"]:
## checkpointsByGameSession = checkpointsByGameSession.combine_first(checkpointsDF[checkpointsDF["type"]==alternate].loc[:,['section','playerId']])
# checkpointsByGameSession = pd.merge(checkpointsByGameSession,checkpointsDF[checkpointsDF["type"]==alternate].loc[:,['section','playerId']])
grouped = checkpointsByGameSession.groupby("playerId")
maxCheckpointWithIDs = grouped.max()
maxCheckpointWithIDs.head()
maxCheckpointTable = pd.DataFrame({"maxCheckpoint" : maxCheckpointWithIDs.values.flatten()})
maxCheckpointCounts = maxCheckpointTable["maxCheckpoint"].value_counts()
maxCheckpointCounts['Checkpoint00'] = None
maxCheckpointCounts = maxCheckpointCounts.sort_index()
print '\nmaxCheckpointCounts=\n{0}'.format(maxCheckpointCounts)
maxCheckpointCountsTable = pd.DataFrame({"maxCheckpoint" : maxCheckpointCounts.values})
genericTreatment( maxCheckpointCountsTable, "best checkpoint reached", "game sessions", 0, reachedGameSessionCount, False, True )
maxCheckpointCountsTable.describe()
fullMaxCheckpointCounts = maxCheckpointCounts
fullMaxCheckpointCounts['Checkpoint00'] = neverReachedGameSessionCount
fullMaxCheckpointCountsTable = pd.DataFrame({"fullMaxCheckpoint" : fullMaxCheckpointCounts.values})
genericTreatment( fullMaxCheckpointCountsTable, "best checkpoint reached", "game sessions", 0, checkpointsGameSessionCount, False, True )
print '\nfullMaxCheckpointCountsTable=\n{0}'.format(fullMaxCheckpointCountsTable)
fullMaxCheckpointCountsTable.describe()
Out[16]: