Preparation


In [ ]:
%run "../Functions/1. Game sessions.ipynb"

In [ ]:
import unidecode

Tests

Tinkering


In [ ]:
accented_string = "Enormément"
# accented_string is of type 'unicode'

unaccented_string = unidecode.unidecode(accented_string)
unaccented_string
# unaccented_string contains 'Malaga'and is of type 'str'

getUserSessions tinkering


In [ ]:
_rmDF = rmdf1522
userId = '8829514a-cb9f-47fb-aaeb-3167776f1062'
#userId = getRandomRedMetricsGUID(_rmDF)

In [ ]:
#def getUserSessions( _rmDF, userId):
result = _rmDF.loc[:,['userId','sessionId']][_rmDF['userId']==userId]['sessionId'].drop_duplicates().dropna(how='any')
result

In [ ]:
_sessionIndex = randint(0,len(result)-1)
_guid = result.iloc[_sessionIndex]
_guid

In [ ]:
userId

getTranslatedForm tinkering - from 0.4 GF correct answers

questionsAnswersTranslationsFR.T

questionsAnswersTranslationsFR.loc["Are you interested in video games?"]

questionsAnswersTranslationsFR.loc["Do you play video games?"]

localizedFormFR = gformFR

returns an English-indexed, English-localized answer dataframe

from a French-indexed, French-localized dataframe

def getTranslatedForm( localizedFormFR ):

result = localizedFormFR.copy()

translate answers

for question in result.columns: for index in result.index: answer = result.loc[index, question] if (0 != len(questionsAnswersTranslationsFR.loc[question])): if (answer in questionsAnswersTranslationsFR.loc[question]): result.loc[index, question] =\ questionsAnswersTranslationsFR.loc[question][answer] else: print(question)

            #print(index)
            print(answer)
            print(questionsAnswersTranslationsFR.loc[question])
            print()
            print()
            print()

translate questions

result = result.rename(columns=dict(zip(localizedFormFR.columns,gformEN.columns)))

result.T

len(questionsAnswersTranslationsFR.loc[QTimestamp])

getTranslatedForm( gformFR, questionsAnswersTranslationsFR ).iloc[1]

getRandomRedMetricsGUID tinkering


In [ ]:
from random import randint
uniqueUsers = rmdf1522['userId'].dropna().unique()
userCount = len(uniqueUsers)
testlocalplayerguid = '0'
while (not isGUIDFormat(testlocalplayerguid)):
    userIndex = randint(0,userCount-1)
    testlocalplayerguid = uniqueUsers[userIndex]
testlocalplayerguid

In [ ]:
sessionscount = rmdf1522["sessionId"].nunique()
sessionscount

In [ ]:
platforms = rmdf1522["customData.platform"].unique()
platforms

print("part100="+str(part100.head(1))) print("part131="+str(part131.head(1))) print("part132="+str(part132.head(1))) print("part133="+str(part133.head(1))) print("part140="+str(part140.head(1))) print("part150="+str(part150.head(1))) print("part151="+str(part151.head(1))) print("part152="+str(part152.head(1))) print("df="+str(df.head(1)))


In [ ]:
testGUID = '"4dbc2f43-421c-4e23-85d4-f17723ff8c66"'

In [ ]:
# includewithoutusers=True will count sessions that do not have any userId attached
getSessionsCount( rmdf1522, testGUID)

print("part100="+str(part100.columns)) print("part131="+str(part131.columns)) print("part132="+str(part132.columns)) print("part133="+str(part133.columns)) print("part140="+str(part140.columns)) print("part150="+str(part150.columns)) print("part151="+str(part151.columns)) print("part152="+str(part152.columns))

print("dfconcat="+str(dfconcat.columns))

print("df="+str(df.columns))

df.columns


In [ ]:
sessionsList = getUserSessions(rmdf1522, testGUID)
sessionsList

In [ ]:
sessionsList = rmdf1522[rmdf1522['type']=='start']
sessionsList = sessionsList.drop('type', 1)
sessionsList = sessionsList.dropna(how='any')
userSessionsList = sessionsList[sessionsList['userId']==testGUID]
userSessionsList

In [ ]:
#print(testGUID)
sessionsList = getUserSessions(rmdf1522, testGUID)
#sessionsList = getAllSessions(rmdf1522, testGUID.replace('"',''))
#print(type(sessionsList))
sessionsList.shape[0]

In [ ]:
allSessions = rmdf1522.loc[:,['userId', 'sessionId']].drop_duplicates()
allSessions.head()

In [ ]:
allSessions.groupby('userId').size().reset_index(name='counts').sort_values(by='counts', ascending=False).head(10)

In [ ]:
#getUserSessionsCounts(getNormalizedRedMetricsCSV(part152)).head(10)

In [ ]:
allSessions.groupby('userId').agg(['count']).head() #.sort_values(by='sessionId', ascending=False).head(10)

In [ ]:
#df2 = pd.concat([df151, rmdf1522])
#df2.head(2)
#print(df2.columns)
#df2columns = df2.columns.values
#type(df2columns)
#df2columns
#newColumns = np.concatenate((minimalInitializationColumns, df2columns))
#newColumns
#df2 = getNormalizedRedMetricsCSV(df)

getRandomSessionGUID tinkering


In [ ]:
getRandomSessionGUID()

In [ ]:
_userId = '"e8fed737-7c65-49c8-bf84-f8ae71c094f8"'
type(rmdf1522['userId'].dropna().unique()), type(getUserSessions( rmdf1522, _userId ))

In [ ]:
_userId = 'e8fed737-7c65-49c8-bf84-f8ae71c094f8'
_uniqueSessions = getUserSessions( rmdf1522, _userId )
len(_uniqueSessions)
_uniqueSessions

In [ ]:
#_userId = ''
_userId = '"e8fed737-7c65-49c8-bf84-f8ae71c094f8"'

#def getRandomSessionGUID( _userId = '' ):    
rmId = _userId
if( not(isGUIDFormat(_userId))):
   rmId = getRandomRedMetricsGUID()

_uniqueSessions = getUserSessions( rmdf1522, rmId )

_sessionsCount = len(_uniqueSessions)
_guid = ""
_sessionIndex = randint(0,_sessionsCount-1)
_guid = _uniqueSessions.iloc[_sessionIndex]
_guid

In [ ]:
rmId

In [ ]:
_sessionIndex

In [ ]:
_sessionsCount

In [ ]:
randint(0,0)

In [ ]:
_uniqueSessions

In [ ]:
getRandomSessionGUID()

getFirstEventDate tinkering


In [ ]:
userId = testGUID
userId = getRandomRedMetricsGUID()

#print('----------------------uid='+str(uid)+'----------------------')
sessions = getUserSessions(rmdf1522, userId)

firstGameTime = pd.to_datetime('2050-12-31T12:59:59.000Z', utc=True)

for session in sessions:
    #print('-----------------------------------------session='+str(session))
    timedEvents = rmdf1522[rmdf1522['sessionId']==session]
    timedEvents = timedEvents.dropna(subset=['section'])

    if(len(timedEvents) > 0):
        timedEvents['userTime'] = timedEvents['userTime'].map(lambda t: pd.to_datetime(t, utc=True))

        earliest = timedEvents['userTime'].min()
        if(earliest < firstGameTime):
            firstGameTime = earliest
    #else:
        #print('no event with section')
#print('-----------------------------------------')
print("firstGameTime=" + str(firstGameTime))

In [ ]:
rmdf1522[rmdf1522['userId']==userId]

In [ ]:
sessions = getUserSessions(rmdf1522, userId)
sessions