In [ ]:
%run "../Functions/1. Google form analysis.ipynb"
In [ ]:
# Localplayerguids of users who answered the questionnaire (see below).
# French
#localplayerguid = 'a4d4b030-9117-4331-ba48-90dc05a7e65a'
#localplayerguid = 'd6826fd9-a6fc-4046-b974-68e50576183f'
#localplayerguid = 'deb089c0-9be3-4b75-9b27-28963c77b10c'
#localplayerguid = '75e264d6-af94-4975-bb18-50cac09894c4'
#localplayerguid = '3d733347-0313-441a-b77c-3e4046042a53'
# English
localplayerguid = '8d352896-a3f1-471c-8439-0f426df901c1'
#localplayerguid = '7037c5b2-c286-498e-9784-9a061c778609'
#localplayerguid = '5c4939b5-425b-4d19-b5d2-0384a515539e'
#localplayerguid = '7825d421-d668-4481-898a-46b51efe40f0'
#localplayerguid = 'acb9c989-b4a6-4c4d-81cc-6b5783ec71d8'
#localplayerguid = devPCID5
In [ ]:
len(getAllResponders())
In [ ]:
assert(not hasAnswered( userIDThatDidNotAnswer )), "User has NOT answered"
In [ ]:
assert(hasAnswered( userID1AnswerEN )), "User HAS answered"
In [ ]:
assert(hasAnswered( userIDAnswersEN )), "User HAS answered"
In [ ]:
assert(hasAnswered( userID1AnswerFR )), "User HAS answered"
In [ ]:
assert(hasAnswered( userIDAnswersFR )), "User HAS answered"
In [ ]:
assert(hasAnswered( userIDAnswersENFR )), "User HAS answered"
In [ ]:
assert (len(getAnswers( userIDThatDidNotAnswer ).columns) == 0),"Too many answers"
In [ ]:
assert (len(getAnswers( userID1AnswerEN ).columns) == 1),"Too many answers"
In [ ]:
assert (len(getAnswers( userIDAnswersEN ).columns) >= 2),"Not enough answers"
In [ ]:
assert (len(getAnswers( userID1AnswerFR ).columns) == 1),"Not enough columns"
In [ ]:
assert (len(getAnswers( userIDAnswersFR ).columns) >= 2),"Not enough answers"
In [ ]:
assert (len(getAnswers( userIDAnswersENFR ).columns) >= 2),"Not enough answers"
In [ ]:
assert (len(getCorrections( userIDThatDidNotAnswer ).columns) == 0),"Too many answers"
In [ ]:
assert (len(getCorrections( userID1AnswerEN ).columns) == 2),"Too many answers"
In [ ]:
assert (len(getCorrections( userIDAnswersEN ).columns) >= 4),"Not enough answers"
In [ ]:
assert (len(getCorrections( userID1AnswerFR ).columns) == 2),"Too many answers"
In [ ]:
assert (len(getCorrections( userIDAnswersFR ).columns) >= 4),"Not enough answers"
In [ ]:
assert (len(getCorrections( userIDAnswersENFR ).columns) >= 4),"Not enough answers"
In [ ]:
In [ ]:
assert (len(pd.DataFrame(getScore( userIDThatDidNotAnswer ).values.flatten().tolist()).values.flatten().tolist()) == 0),"Too many answers"
In [ ]:
score = getScore( userID1AnswerEN )
#print(score)
assert (
(len(score.values.flatten()) == 3)
and
score['before'][0][0] == 23
),"Incorrect score"
In [ ]:
score = getScore( userIDAnswersEN )
#print(score)
assert (
(len(score.values.flatten()) == 3)
and
score['before'][0][0] == 5
and
score['after'][0][0] == 25
),"Incorrect score"
In [ ]:
score = getScore( userID1AnswerFR )
#print(score)
assert (
(len(score.values.flatten()) == 3)
and
score['before'][0][0] == 23
),"Incorrect score"
In [ ]:
score = getScore( userIDAnswersFR )
#print(score)
assert (
(len(score.values.flatten()) == 3)
and
score['before'][0][0] == 15
and
score['after'][0][0] == 26
),"Incorrect score"
In [ ]:
score = getScore( userIDAnswersENFR )
#print(score)
assert (
(len(score.values.flatten()) == 3)
and
score['before'][0][0] == 4
and
score['after'][0][0] == 13
),"Incorrect score"
In [ ]:
objective = 0
assert (len(getValidatedCheckpoints( userIDThatDidNotAnswer )) == objective),"Incorrect number of answers"
In [ ]:
objective = 1
assert (len(getValidatedCheckpoints( userID1AnswerEN )) == objective),"Incorrect number of answers"
In [ ]:
assert (getValidatedCheckpoints( userID1AnswerEN )[0].equals(validableCheckpoints)) \
, "User has validated everything"
In [ ]:
objective = 2
assert (len(getValidatedCheckpoints( userIDAnswersEN )) == objective),"Incorrect number of answers"
In [ ]:
objective = 3
assert (len(getValidatedCheckpoints( userIDAnswersEN )[0]) == objective) \
, "User has validated " + objective + " chapters on first try"
In [ ]:
objective = 1
assert (len(getValidatedCheckpoints( userID1AnswerFR )) == objective),"Incorrect number of answers"
In [ ]:
assert (getValidatedCheckpoints( userID1AnswerFR )[0].equals(validableCheckpoints)) \
, "User has validated everything"
In [ ]:
objective = 2
assert (len(getValidatedCheckpoints( userIDAnswersFR )) == objective),"Incorrect number of answers"
In [ ]:
objective = 5
assert (len(getValidatedCheckpoints( userIDAnswersFR )[1]) == objective) \
, "User has validated " + objective + " chapters on second try"
In [ ]:
objective = 2
assert (len(getValidatedCheckpoints( userIDAnswersENFR )) == objective),"Incorrect number of answers"
In [ ]:
objective = 5
assert (len(getValidatedCheckpoints( userIDAnswersENFR )[1]) == objective) \
, "User has validated " + objective + " chapters on second try"
In [ ]:
getValidatedCheckpoints( userIDThatDidNotAnswer )
In [ ]:
pd.Series(getValidatedCheckpoints( userIDThatDidNotAnswer ))
In [ ]:
type(getNonValidated(pd.Series(getValidatedCheckpoints( userIDThatDidNotAnswer ))))
In [ ]:
validableCheckpoints
In [ ]:
assert(getNonValidated(getValidatedCheckpoints( userIDThatDidNotAnswer ))).equals(validableCheckpoints), \
"incorrect validated checkpoints: should contain all checkpoints that can be validated"
In [ ]:
testSeries = pd.Series(
[
'', # 7
'', # 8
'', # 9
'', # 10
'tutorial1.Checkpoint00', # 11
'tutorial1.Checkpoint00', # 12
'tutorial1.Checkpoint00', # 13
'tutorial1.Checkpoint00', # 14
'tutorial1.Checkpoint02', # 15
'tutorial1.Checkpoint01', # 16
'tutorial1.Checkpoint05'
]
)
assert(getNonValidated(pd.Series([testSeries]))[0][0] == 'tutorial1.Checkpoint13'), "Incorrect non validated checkpoint"
In [ ]:
getNonValidatedCheckpoints( userIDThatDidNotAnswer )
In [ ]:
getNonValidatedCheckpoints( userID1AnswerEN )
In [ ]:
getNonValidatedCheckpoints( userIDAnswersEN )
In [ ]:
getNonValidatedCheckpoints( userID1AnswerFR )
In [ ]:
getNonValidatedCheckpoints( userIDAnswersFR )
In [ ]:
getNonValidatedCheckpoints( userIDAnswersENFR )
In [ ]:
getValidatedCheckpointsCounts(userIDThatDidNotAnswer)
getValidatedCheckpointsCounts(userID1AnswerEN)
getValidatedCheckpointsCounts(userIDAnswersEN)
getValidatedCheckpointsCounts(userID1ScoreEN)
getValidatedCheckpointsCounts(userIDScoresEN)
getValidatedCheckpointsCounts(userID1AnswerFR)
getValidatedCheckpointsCounts(userIDAnswersFR)
getValidatedCheckpointsCounts(userID1ScoreFR)
getValidatedCheckpointsCounts(userIDScoresFR)
getValidatedCheckpointsCounts(userIDAnswersENFR)
In [ ]:
getNonValidatedCheckpointsCounts(userIDThatDidNotAnswer)
getNonValidatedCheckpointsCounts(userID1AnswerEN)
getNonValidatedCheckpointsCounts(userIDAnswersEN)
getNonValidatedCheckpointsCounts(userID1ScoreEN)
getNonValidatedCheckpointsCounts(userIDScoresEN)
getNonValidatedCheckpointsCounts(userID1AnswerFR)
getNonValidatedCheckpointsCounts(userIDAnswersFR)
getNonValidatedCheckpointsCounts(userID1ScoreFR)
getNonValidatedCheckpointsCounts(userIDScoresFR)
getNonValidatedCheckpointsCounts(userIDAnswersENFR)
In [ ]:
aYes = ["Yes", "Oui"]
aNo = ["No", "Non"]
aNoIDK = ["No", "Non", "I don't know", "Je ne sais pas"]
# How long have you studied biology?
qBiologyEducationLevelIndex = 5
aBiologyEducationLevelHigh = ["Until bachelor's degree", "Jusqu'à la license"]
aBiologyEducationLevelLow = ['Until the end of high school', 'Until the end of middle school', 'Not even in middle school'\
"Jusqu'au bac", "Jusqu'au brevet", 'Jamais']
# Have you ever heard about BioBricks?
qHeardBioBricksIndex = 8
# Have you played the current version of Hero.Coli?
qPlayedHerocoliIndex = 10
qPlayedHerocoliYes = ['Yes', 'Once', 'Multiple times', 'Oui',
'De nombreuses fois', 'Quelques fois', 'Une fois']
qPlayedHerocoliNo = ['No', 'Non',]
In [ ]:
gform['How long have you studied biology?'].unique()
In [ ]:
gform['Before playing Hero.Coli, had you ever heard about BioBricks?'].unique()
In [ ]:
gform['Have you played the current version of Hero.Coli?'].unique()
In [ ]:
getAllAnswerRows(qBiologyEducationLevelIndex, aBiologyEducationLevelHigh)
In [ ]:
assert(len(getAllAnswerRows(qBiologyEducationLevelIndex, aBiologyEducationLevelHigh)) != 0)
In [ ]:
assert(len(getAllAnswerRows(qBiologyEducationLevelIndex, aBiologyEducationLevelLow)) != 0)
In [ ]:
assert(len(getAllAnswerRows(qHeardBioBricksIndex, aYes)) != 0)
In [ ]:
assert(len(getAllAnswerRows(qHeardBioBricksIndex, aNoIDK)) != 0)
In [ ]:
assert(len(getAllAnswerRows(qPlayedHerocoliIndex, qPlayedHerocoliYes)) != 0)
In [ ]:
assert(len(getAllAnswerRows(qPlayedHerocoliIndex, qPlayedHerocoliNo)) != 0)
In [ ]:
questionIndex = 15
gform.iloc[:, questionIndex].head()
In [ ]:
(qBiologyEducationLevelIndex, aBiologyEducationLevelHigh)
In [ ]:
getAllAnswerRows(qBiologyEducationLevelIndex, aBiologyEducationLevelHigh)
In [ ]:
getPercentCorrectKnowingAnswer(qBiologyEducationLevelIndex, aBiologyEducationLevelHigh)
In [ ]:
getPercentCorrectKnowingAnswer(qBiologyEducationLevelIndex, aBiologyEducationLevelLow)
In [ ]:
getPercentCorrectKnowingAnswer(qHeardBioBricksIndex, aYes)
In [ ]:
getPercentCorrectKnowingAnswer(qHeardBioBricksIndex, aNoIDK)
In [ ]:
playedHerocoliIndexYes = getPercentCorrectKnowingAnswer(qPlayedHerocoliIndex, qPlayedHerocoliYes)
playedHerocoliIndexYes
In [ ]:
playedHerocoliIndexNo = getPercentCorrectKnowingAnswer(qPlayedHerocoliIndex, qPlayedHerocoliNo)
playedHerocoliIndexNo
In [ ]:
playedHerocoliIndexYes - playedHerocoliIndexNo
In [ ]:
(playedHerocoliIndexYes - playedHerocoliIndexNo) / (1 - playedHerocoliIndexNo)
In [ ]:
#gform = gformEN
In [ ]:
transposed = gform.T
#answers = transposed[transposed[]]
transposed
In [ ]:
type(gform)
In [ ]:
gform.columns
In [ ]:
gform.columns.get_loc('Do not edit - pre-filled anonymous ID')
In [ ]:
localplayerguidkey
In [ ]:
# Using the whole question:
gform[localplayerguidkey]
In [ ]:
# Get index from question
localplayerguidindex
In [ ]:
# Using the index of the question:
gform.iloc[:, localplayerguidindex]
userIDThatDidNotAnswer
userID1AnswerEN
userIDAnswersEN
userID1AnswerFR
userIDAnswersFR
userIDAnswersENFR
In [ ]:
sample = gform
#def getUniqueUserCount(sample):
sample[localplayerguidkey].nunique()
In [ ]:
userIds = gform[localplayerguidkey].unique()
len(userIds)
In [ ]:
allResponders = getAllResponders()
uniqueUsers = np.unique(allResponders)
print(len(allResponders))
print(len(uniqueUsers))
for guid in uniqueUsers:
if(not isGUIDFormat(guid)):
print('incorrect guid: ' + str(guid))
In [ ]:
uniqueUsers = getAllResponders()
userCount = len(uniqueUsers)
guid = '0'
while (not isGUIDFormat(guid)):
userIndex = randint(0,userCount-1)
guid = uniqueUsers[userIndex]
guid
In [ ]:
#userId = userIDThatDidNotAnswer
#userId = userID1AnswerEN
userId = userIDAnswersEN
_form = gform
#def getAnswers( userId, _form = gform ):
answers = _form[_form[localplayerguidkey]==userId]
_columnAnswers = answers.T
if 0 != len(answers):
_newColumns = []
for column in _columnAnswers.columns:
_newColumns.append(answersColumnNameStem + str(column))
_columnAnswers.columns = _newColumns
else:
# user has never answered
print("user " + str(userId) + " has never answered")
_columnAnswers
In [ ]:
answers
In [ ]:
# Selection of a specific answer
answers.iloc[:,localplayerguidindex]
In [ ]:
answers.iloc[:,localplayerguidindex].iloc[0]
In [ ]:
type(answers.iloc[0,:])
In [ ]:
answers.iloc[0,:].values
In [ ]:
#### Question that has a correct answer:
In [ ]:
questionIndex = 15
In [ ]:
answers.iloc[:,questionIndex].iloc[0]
In [ ]:
correctAnswers.iloc[questionIndex][0]
In [ ]:
answers.iloc[:,questionIndex].iloc[0].startswith(correctAnswers.iloc[questionIndex][0])
In [ ]:
#### Question that has no correct answer:
In [ ]:
questionIndex = 0
#answers.iloc[:,questionIndex].iloc[0].startswith(correctAnswers.iloc[questionIndex].iloc[0])
In [ ]:
#### Batch check:
In [ ]:
columnAnswers = getAnswers( userId )
In [ ]:
columnAnswers.values[2,0]
In [ ]:
columnAnswers[columnAnswers.columns[0]][2]
In [ ]:
correctAnswers
In [ ]:
type(columnAnswers)
In [ ]:
indexOfFirstEvaluationQuestion = 13
columnAnswers.index[indexOfFirstEvaluationQuestion]
In [ ]:
gform.tail(50)
In [ ]:
gform[gform[localplayerguidkey] == 'ba202bbc-af77-42e8-85ff-e25b871717d5']
In [ ]:
gformRealBefore = gform.loc[88, 'Timestamp']
gformRealBefore
In [ ]:
gformRealAfter = gform.loc[107, 'Timestamp']
gformRealAfter
In [ ]:
RMRealFirstEvent = getFirstEventDate(gform.loc[88,localplayerguidkey])
RMRealFirstEvent
In [ ]:
tzAnswerDate = gformRealBefore
gameEventDate = RMRealFirstEvent
#def getTemporality( answerDate, gameEventDate ):
result = answerTemporalities[2]
if(gameEventDate != pd.Timestamp.max.tz_localize('utc')):
if(answerDate <= gameEventDate):
result = answerTemporalities[0]
elif (answerDate > gameEventDate):
result = answerTemporalities[1]
result, tzAnswerDate, gameEventDate
In [ ]:
In [ ]:
firstEventDate = getFirstEventDate(gform.loc[userIndex,localplayerguidkey])
firstEventDate
In [ ]:
gformTestBefore = pd.Timestamp('2018-01-16 14:28:20.998000+0000', tz='UTC')
getTemporality(gformTestBefore,firstEventDate)
In [ ]:
gformTestWhile = pd.Timestamp('2018-01-16 14:28:23.998000+0000', tz='UTC')
getTemporality(gformTestWhile,firstEventDate)
In [ ]:
gformTestAfter = pd.Timestamp('2018-01-16 14:28:24.998000+0000', tz='UTC')
getTemporality(gformTestAfter,firstEventDate)
In [ ]:
_form = gform
_rmDF = rmdf152
_rmTestDF = normalizedRMDFTest
includeAndroid = True
#def getTestAnswers( _form = gform, _rmDF = rmdf152, _rmTestDF = normalizedRMDFTest, includeAndroid = True):
_form[_form[localplayerguidkey].isin(testUsers)]
In [ ]:
_form[localplayerguidkey]
In [ ]:
testUsers
In [ ]:
len(getTestAnswers()[localplayerguidkey])
In [ ]:
rmdf152['customData.platform'].unique()
In [ ]:
rmdf152[rmdf152['customData.platform'].apply(lambda s: str(s).endswith('editor'))]
In [ ]:
rmdf152[rmdf152['userId'].isin(getTestAnswers()[localplayerguidkey])][['userTime','customData.platform','userId']].dropna()
In [ ]:
columnAnswers
In [ ]:
#testUserId = userID1AnswerEN
testUserId = '8d352896-a3f1-471c-8439-0f426df901c1'
In [ ]:
getCorrections(testUserId)
In [ ]:
testUserId = '8d352896-a3f1-471c-8439-0f426df901c1'
source = correctAnswers
#def getCorrections( _userId, _source = correctAnswers, _form = gform ):
columnAnswers = getAnswers( testUserId )
if 0 != len(columnAnswers.columns):
questionsCount = len(columnAnswers.values)
for columnName in columnAnswers.columns:
if answersColumnNameStem in columnName:
answerNumber = columnName.replace(answersColumnNameStem,"")
newCorrectionsColumnName = correctionsColumnNameStem + answerNumber
columnAnswers[newCorrectionsColumnName] = columnAnswers[columnName]
columnAnswers[newCorrectionsColumnName] = pd.Series(np.full(questionsCount, np.nan))
for question in columnAnswers[columnName].index:
#print()
#print(question)
__correctAnswers = source.loc[question]
if(len(__correctAnswers) > 0):
columnAnswers.loc[question,newCorrectionsColumnName] = False
for correctAnswer in __correctAnswers:
#print("-> " + correctAnswer)
if str(columnAnswers.loc[question,columnName])\
.startswith(str(correctAnswer)):
columnAnswers.loc[question,newCorrectionsColumnName] = True
break
else:
# user has never answered
print("can't give correct answers")
columnAnswers
In [ ]:
question = 'How old are you?'
columnName = ''
for column in columnAnswers.columns:
if str.startswith(column, 'answers'):
columnName = column
break
In [ ]:
type(columnAnswers.loc[question,columnName])
In [ ]:
getCorrections(localplayerguid)
In [ ]:
gform.columns[20]
In [ ]:
columnAnswers.loc[gform.columns[20],columnAnswers.columns[1]]
In [ ]:
columnAnswers[columnAnswers.columns[1]][gform.columns[13]]
In [ ]:
columnAnswers.loc[gform.columns[13],columnAnswers.columns[1]]
In [ ]:
columnAnswers.iloc[20,1]
In [ ]:
questionsCount
In [ ]:
np.full(3, np.nan)
In [ ]:
pd.Series(np.full(questionsCount, np.nan))
In [ ]:
columnAnswers.loc[question,newCorrectionsColumnName]
In [ ]:
question
In [ ]:
correctAnswers[question]
In [ ]:
getCorrections('8d352896-a3f1-471c-8439-0f426df901c1')
In [ ]:
correctAnswersEN
#demographicAnswersEN
type([])
In [ ]:
mergedCorrectAnswersEN = correctAnswersEN.copy()
for index in mergedCorrectAnswersEN.index:
#print(str(mergedCorrectAnswersEN.loc[index,column]))
mergedCorrectAnswersEN.loc[index] =\
demographicAnswersEN.loc[index] + mergedCorrectAnswersEN.loc[index]
mergedCorrectAnswersEN
In [ ]:
correctAnswersEN + demographicAnswersEN
In [ ]:
correctAnswers + demographicAnswers
In [ ]:
corrections = getCorrections(userIDAnswersENFR)
#corrections
In [ ]:
for columnName in corrections.columns:
if correctionsColumnNameStem in columnName:
for index in corrections[columnName].index:
if(True==corrections.loc[index,columnName]):
corrections.loc[index,columnName] = 1
elif (False==corrections.loc[index,columnName]):
corrections.loc[index,columnName] = 0
corrections
In [ ]:
binarized = getBinarizedCorrections(corrections)
binarized
In [ ]:
slicedBinarized = binarized[13:40]
slicedBinarized
In [ ]:
slicedBinarized =\
binarized[13:40][binarized.columns[\
binarized.columns.to_series().str.contains(correctionsColumnNameStem)\
]]
slicedBinarized
In [ ]:
_source = correctAnswers
_userId = getRandomGFormGUID()
getCorrections(_userId, _source=_source, _form = gform)
In [ ]:
_userId = '5e978fb3-316a-42ba-bb58-00856353838d'
gform[gform[localplayerguidkey] == _userId].iloc[0].index
In [ ]:
_gformLine = gform[gform[localplayerguidkey] == _userId].iloc[0]
_gformLine.loc['Before playing Hero.Coli, had you ever heard about synthetic biology?']
In [ ]:
_gformLine = gform[gform[localplayerguidkey] == _userId].iloc[0]
# only for one user
# def getBinarized(_gformLine, _source = correctAnswers):
_notEmptyIndexes = []
for _index in _source.index:
if(len(_source.loc[_index]) > 0):
_notEmptyIndexes.append(_index)
_binarized = pd.Series(np.full(len(_gformLine.index), np.nan), index = _gformLine.index)
for question in _gformLine.index:
_correctAnswers = _source.loc[question]
if(len(_correctAnswers) > 0):
_binarized[question] = 0
for _correctAnswer in _correctAnswers:
if str(_gformLine.loc[question])\
.startswith(str(_correctAnswer)):
_binarized.loc[question] = 1
break
_slicedBinarized = _binarized.loc[_notEmptyIndexes]
_slicedBinarized
In [ ]:
_slicedBinarized.loc['What are BioBricks and devices?']
In [ ]:
allBinarized = getAllBinarized()
In [ ]:
plotCorrelationMatrix(allBinarized)
In [ ]:
source
In [ ]:
source = correctAnswers + demographicAnswers
notEmptyIndexes = []
for eltIndex in source.index:
#print(eltIndex)
if(len(source.loc[eltIndex]) > 0):
notEmptyIndexes.append(eltIndex)
len(source)-len(notEmptyIndexes)
In [ ]:
emptyForm = gform[gform[localplayerguidkey] == 'incorrectGUID']
In [ ]:
emptyForm
In [ ]:
_source = correctAnswers + demographicAnswers
_form = gform #emptyForm
#def getAllBinarized(_source = correctAnswers, _form = gform ):
_notEmptyIndexes = []
for _index in _source.index:
if(len(_source.loc[_index]) > 0):
_notEmptyIndexes.append(_index)
_result = pd.DataFrame(index = _notEmptyIndexes)
for _userId in getAllResponders( _form = _form ):
_corrections = getCorrections(_userId, _source=_source, _form = _form)
_binarized = getBinarizedCorrections(_corrections)
_slicedBinarized =\
_binarized.loc[_notEmptyIndexes][_binarized.columns[\
_binarized.columns.to_series().str.contains(correctionsColumnNameStem)\
]]
_result = pd.concat([_result, _slicedBinarized], axis=1)
_result = _result.T
#_result
In [ ]:
if(_result.shape[0] > 0 and _result.shape[1] > 0):
correlation = _result.astype(float).corr()
#plt.matshow(correlation)
sns.clustermap(correlation,cmap=plt.cm.jet,square=True,figsize=(10,10))
In [ ]:
#ax = sns.clustermap(correlation,cmap=plt.cm.jet,square=True,figsize=(10,10),cbar_kws={\
#"orientation":"vertical"})
In [ ]:
correlation_pearson = _result.T.astype(float).corr(methods[0])
correlation_kendall = _result.T.astype(float).corr(methods[1])
correlation_spearman = _result.T.astype(float).corr(methods[2])
print(correlation_pearson.equals(correlation_kendall))
print(correlation_kendall.equals(correlation_spearman))
diff = (correlation_pearson - correlation_kendall)
flattened = diff[diff > 0.1].values.flatten()
flattened[~np.isnan(flattened)]
In [ ]:
correlation
In [ ]:
scientificQuestionsLabels = gform.columns[13:40]
scientificQuestionsLabels = [
'In order to modify the abilities of the bacterium, you have to... #1',
'What are BioBricks and devices? #2',
'What is the name of this BioBrick? #3',
'What is the name of this BioBrick?.1 #4',
'What is the name of this BioBrick?.2 #5',
'What is the name of this BioBrick?.3 #6',
'What does this BioBrick do? #7',
'What does this BioBrick do?.1 #8',
'What does this BioBrick do?.2 #9',
'What does this BioBrick do?.3 #10',
'Pick the case where the BioBricks are well-ordered: #11',
'When does green fluorescence happen? #12',
'What happens when you unequip the movement device? #13',
'What is this? #14',
'What does this device do? #15',
'What does this device do?.1 #16',
'What does this device do?.2 #17',
'What does this device do?.3 #18',
'What does this device do?.4 #19',
'What does this device do?.5 #20',
'What does this device do?.6 #21',
'What does this device do?.7 #22',
'Guess: what would a device producing l-arabinose do, if it started with a l-arabinose-induced promoter? #23',
'Guess: the bacterium would glow yellow... #24',
'What is the species of the bacterium of the game? #25',
'What is the scientific name of the tails of the bacterium? #26',
'Find the antibiotic: #27',
]
scientificQuestionsLabelsX = [
'#1 In order to modify the abilities of the bacterium, you have to...',
'#2 What are BioBricks and devices?',
'#3 What is the name of this BioBrick?',
'#4 What is the name of this BioBrick?.1',
'#5 What is the name of this BioBrick?.2',
'#6 What is the name of this BioBrick?.3',
'#7 What does this BioBrick do?',
'#8 What does this BioBrick do?.1',
'#9 What does this BioBrick do?.2',
'#10 What does this BioBrick do?.3',
'#11 Pick the case where the BioBricks are well-ordered:',
'#12 When does green fluorescence happen?',
'#13 What happens when you unequip the movement device?',
'#14 What is this?',
'#15 What does this device do?',
'#16 What does this device do?.1',
'#17 What does this device do?.2',
'#18 What does this device do?.3',
'#19 What does this device do?.4',
'#20 What does this device do?.5',
'#21 What does this device do?.6',
'#22 What does this device do?.7',
'Guess: what would a device producing l-arabinose do, if it started with a l-arabinose-induced p#23 romoter?',
'#24 Guess: the bacterium would glow yellow...',
'#25 What is the species of the bacterium of the game?',
'#26 What is the scientific name of the tails of the bacterium?',
'#27 Find the antibiotic:',
]
In [ ]:
questionsLabels = scientificQuestionsLabels
questionsLabelsX = scientificQuestionsLabelsX
fig = plt.figure(figsize=(10,10))
ax = fig.add_subplot(111)
ax.set_yticklabels(['']+questionsLabels)
ax.set_xticklabels(['']+questionsLabelsX, rotation='vertical')
ax.matshow(correlation)
ax.set_xticks(np.arange(-1,len(questionsLabels),1.));
ax.set_yticks(np.arange(-1,len(questionsLabels),1.));
In [ ]:
questionsLabels = correlation.columns.copy()
newLabels = []
for index in range(0, len(questionsLabels)):
newLabels.append(questionsLabels[index] + ' #' + str(index + 1))
correlationRenamed = correlation.copy()
correlationRenamed.columns = newLabels
correlationRenamed.index = newLabels
correlationRenamed
In [ ]:
correlationRenamed = correlation.copy()
correlationRenamed.columns = pd.Series(correlation.columns).apply(lambda x: x + ' #' + str(correlation.columns.get_loc(x) + 1))
correlationRenamed.index = correlationRenamed.columns
correlationRenamed
In [ ]:
correlation.shape
In [ ]:
fig = plt.figure(figsize=(10,10))
ax12 = plt.subplot(111)
ax12.set_title('Heatmap')
sns.heatmap(correlation,ax=ax12,cmap=plt.cm.jet,square=True)
In [ ]:
ax = sns.clustermap(correlation,cmap=plt.cm.jet,square=True,figsize=(10,10),cbar_kws={\
"orientation":"vertical"})
In [ ]:
questionsLabels = pd.Series(correlation.columns).apply(lambda x: x + ' #' + str(correlation.columns.get_loc(x) + 1))
fig = plt.figure(figsize=(10,10))
ax = plt.subplot(111)
cmap=plt.cm.jet
#cmap=plt.cm.ocean
cax = ax.imshow(correlation, interpolation='nearest', cmap=cmap,
# extent=(0.5,np.shape(correlation)[0]+0.5,0.5,np.shape(correlation)[1]+0.5)
)
#ax.grid(True)
plt.title('Questions\' Correlations')
ax.set_yticklabels(questionsLabels)
ax.set_xticklabels(questionsLabels, rotation='vertical')
ax.set_xticks(np.arange(len(questionsLabels)));
ax.set_yticks(np.arange(len(questionsLabels)));
#ax.set_xticks(np.arange(-1,len(questionsLabels),1.));
#ax.set_yticks(np.arange(-1,len(questionsLabels),1.));
fig.colorbar(cax)
plt.show()
In [ ]:
ax.get_xticks()
In [ ]:
transposed = _result.T.astype(float)
transposed.head()
In [ ]:
transposed.corr()
In [ ]:
transposed.columns = range(0,len(transposed.columns))
transposed.index = range(0,len(transposed.index))
transposed.head()
In [ ]:
transposed = transposed.iloc[0:10,0:3]
transposed
In [ ]:
transposed = transposed.astype(float)
In [ ]:
type(transposed[0][0])
In [ ]:
transposed.columns = list('ABC')
transposed
In [ ]:
transposed.loc[0, 'A'] = 0
transposed
In [ ]:
transposed.corr()
data = transposed[[0,1]] data.corr(method = 'spearman')
In [ ]:
round(7.64684)
In [ ]:
df = pd.DataFrame(10*np.random.randint(2, size=[20,2]),index=range(0,20),columns=list('AB'))
#df.columns = range(0,len(df.columns))
df.head()
#type(df[0][0])
In [ ]:
type(df.columns)
In [ ]:
df.corr()
In [ ]:
#corr = pd.Series({}, index = methods)
for meth in methods:
#corr[meth] = result.corr(method = meth)
print(meth + ":\n" + str(transposed.corr(method = meth)) + "\n\n")
In [ ]:
In [ ]:
befores = gform.copy()
befores = befores[befores['Temporality'] == 'before']
print(len(befores))
allBeforesBinarized = getAllBinarized( _source = correctAnswers + demographicAnswers, _form = befores)
In [ ]:
np.unique(allBeforesBinarized.values.flatten())
In [ ]:
allBeforesBinarized.columns[20]
In [ ]:
allBeforesBinarized.T.dot(allBeforesBinarized)
In [ ]:
np.unique(allBeforesBinarized.iloc[:,20].values)
In [ ]:
plotCorrelationMatrix( allBeforesBinarized, _abs=False,\
_clustered=False, _questionNumbers=True )
In [ ]:
_correlation = allBeforesBinarized.astype(float).corr()
overlay = allBeforesBinarized.T.dot(allBeforesBinarized).astype(int)
_correlation.columns = pd.Series(_correlation.columns).apply(\
lambda x: x + ' #' + str(_correlation.columns.get_loc(x) + 1))
_correlation.index = _correlation.columns
_correlation = _correlation.abs()
_fig = plt.figure(figsize=(20,20))
_ax = plt.subplot(111)
#sns.heatmap(_correlation,ax=_ax,cmap=plt.cm.jet,square=True,annot=overlay,fmt='d')
sns.heatmap(_correlation,ax=_ax,cmap=plt.cm.jet,square=True,annot=True)
In [ ]:
afters = gform.copy()
afters = afters[afters['Temporality'] == 'after']
print(len(afters))
allAftersBinarized = getAllBinarized( _source = correctAnswers + demographicAnswers, _form = afters)
In [ ]:
np.unique(allAftersBinarized.values.flatten())
In [ ]:
plotCorrelationMatrix( allAftersBinarized, _abs=False,\
_clustered=False, _questionNumbers=True )
In [ ]:
#for answerIndex in range(0,len(allAftersBinarized)):
# print(str(answerIndex) + " " + str(allAftersBinarized.iloc[answerIndex,0]))
In [ ]:
allAftersBinarized.iloc[28,0]
In [ ]:
len(allAftersBinarized)
In [ ]:
len(allAftersBinarized.index)
In [ ]:
_correlation = allAftersBinarized.astype(float).corr()
overlay = allAftersBinarized.T.dot(allAftersBinarized).astype(int)
_correlation.columns = pd.Series(_correlation.columns).apply(\
lambda x: x + ' #' + str(_correlation.columns.get_loc(x) + 1))
_correlation.index = _correlation.columns
_fig = plt.figure(figsize=(10,10))
_ax = plt.subplot(111)
#sns.heatmap(_correlation,ax=_ax,cmap=plt.cm.jet,square=True,annot=overlay,fmt='d')
sns.heatmap(_correlation,ax=_ax,cmap=plt.cm.jet,square=True)
In [ ]:
crossCorrect = getCrossCorrectAnswers(allAftersBinarized)
In [ ]:
pd.Series((overlay == crossCorrect).values.flatten()).unique()
In [ ]:
allAftersBinarized.shape
In [ ]:
cross = allAftersBinarized.T.dot(allAftersBinarized)
cross.shape
In [ ]:
equal = (cross == crossCorrect)
type(equal)
In [ ]:
pd.Series(equal.values.flatten()).unique()
In [ ]:
testUser = userIDAnswersFR
In [ ]:
gform[gform[localplayerguidkey] == testUser].T
In [ ]:
getScore(testUser)
In [ ]:
print("draft test")
testUserId = "3ef14300-4987-4b54-a56c-5b6d1f8a24a1"
testUserId = userIDAnswersEN
#def getScore( _userId, _form = gform ):
score = pd.DataFrame({}, columns = answerTemporalities)
score.loc['score',:] = np.nan
for column in score.columns:
score.loc['score', column] = []
if hasAnswered( testUserId ):
columnAnswers = getCorrections(testUserId)
for columnName in columnAnswers.columns:
# only work on corrected columns
if correctionsColumnNameStem in columnName:
answerColumnName = columnName.replace(correctionsColumnNameStem,\
answersColumnNameStem)
temporality = columnAnswers.loc['Temporality',answerColumnName]
counts = (columnAnswers[columnName]).value_counts()
thisScore = 0
if(True in counts):
thisScore = counts[True]
score.loc['score',temporality].append(thisScore)
else:
print("user " + str(testUserId) + " has never answered")
#expectedScore = 18
#if (expectedScore != score[0]):
# print("ERROR incorrect score: expected "+ str(expectedScore) +", got "+ str(score))
score
In [ ]:
score = pd.DataFrame({}, columns = answerTemporalities)
score.loc['score',:] = np.nan
for column in score.columns:
score.loc['score', column] = []
score
In [ ]:
#score.loc['user0',:] = [1,2,3]
In [ ]:
#score
In [ ]:
#type(score)
In [ ]:
#type(score[0])
In [ ]:
#for i,v in score[0].iteritems():
# print(v)
In [ ]:
#score[0]['undefined']
In [ ]:
#columnAnswers.loc['Temporality','answers0']
In [ ]:
False in (columnAnswers[columnName]).value_counts()
In [ ]:
getScore("3ef14300-4987-4b54-a56c-5b6d1f8a24a1")
In [ ]:
#gform[gform[localplayerguidkey]=="3ef14300-4987-4b54-a56c-5b6d1f8a24a1"].T
In [ ]:
correctAnswers
Theoretically, they should match. Whoever understood an item should beat the matching challenge. The discrepancies are due to game design or level design.
In [ ]:
#questionnaireValidatedCheckpointsPerQuestion = pd.Series(np.nan, index=range(35))
questionnaireValidatedCheckpointsPerQuestion = pd.Series(np.nan, index=range(len(checkpointQuestionMatching)))
questionnaireValidatedCheckpointsPerQuestion.head()
In [ ]:
checkpointQuestionMatching['checkpoint'][19]
In [ ]:
userId = localplayerguid
_form = gform
#function that returns the list of checkpoints from user id
#def getValidatedCheckpoints( userId, _form = gform ):
_validatedCheckpoints = []
if hasAnswered( userId, _form = _form ):
_columnAnswers = getCorrections( userId, _form = _form)
for _columnName in _columnAnswers.columns:
# only work on corrected columns
if correctionsColumnNameStem in _columnName:
_questionnaireValidatedCheckpointsPerQuestion = pd.Series(np.nan, index=range(len(checkpointQuestionMatching)))
for _index in range(0, len(_questionnaireValidatedCheckpointsPerQuestion)):
if _columnAnswers[_columnName][_index]==True:
_questionnaireValidatedCheckpointsPerQuestion[_index] = checkpointQuestionMatching['checkpoint'][_index]
else:
_questionnaireValidatedCheckpointsPerQuestion[_index] = ''
_questionnaireValidatedCheckpoints = _questionnaireValidatedCheckpointsPerQuestion.unique()
_questionnaireValidatedCheckpoints = _questionnaireValidatedCheckpoints[_questionnaireValidatedCheckpoints!='']
_questionnaireValidatedCheckpoints = pd.Series(_questionnaireValidatedCheckpoints)
_questionnaireValidatedCheckpoints = _questionnaireValidatedCheckpoints.sort_values()
_questionnaireValidatedCheckpoints.index = range(0, len(_questionnaireValidatedCheckpoints))
_validatedCheckpoints.append(_questionnaireValidatedCheckpoints)
else:
print("user " + str(userId) + " has never answered")
result = pd.Series(data=_validatedCheckpoints)
In [ ]:
result
In [ ]:
type(result[0])
In [ ]:
testSeries1 = pd.Series(
[
'tutorial1.Checkpoint00',
'tutorial1.Checkpoint01',
'tutorial1.Checkpoint02',
'tutorial1.Checkpoint05'
]
)
testSeries2 = pd.Series(
[
'tutorial1.Checkpoint01',
'tutorial1.Checkpoint05'
]
)
np.setdiff1d(testSeries1, testSeries2)
np.setdiff1d(testSeries1.values, testSeries2.values)
In [ ]:
getAnswers(localplayerguid).head(2)
In [ ]:
getCorrections(localplayerguid).head(2)
In [ ]:
getScore(localplayerguid)
In [ ]:
getValidatedCheckpoints(localplayerguid)
In [ ]:
getNonValidatedCheckpoints(localplayerguid)
In [ ]:
qPlayedHerocoliIndex = 10
qPlayedHerocoliYes = ['Yes', 'Once', 'Multiple times', 'Oui',
'De nombreuses fois', 'Quelques fois', 'Une fois']
questionIndex = qPlayedHerocoliIndex
choice = qPlayedHerocoliYes
_form = gform
# returns all rows of Google form's answers that contain an element
# of the array 'choice' for question number 'questionIndex'
#def getAllAnswerRows(questionIndex, choice, _form = gform ):
_form[_form.iloc[:, questionIndex].isin(choice)]
In [ ]:
_df = getAllAnswerRows(qPlayedHerocoliIndex, qPlayedHerocoliYes, _form = gform )
#def getPercentCorrectPerColumn(_df):
_count = len(_df)
_percents = pd.Series(np.full(len(_df.columns), np.nan), index=_df.columns)
for _rowIndex in _df.index:
for _columnName in _df.columns:
_columnIndex = _df.columns.get_loc(_columnName)
if ((_columnIndex >= firstEvaluationQuestionIndex) \
and (_columnIndex < len(_df.columns)-3)):
if(str(_df[_columnName][_rowIndex]).startswith(str(correctAnswers[_columnIndex]))):
if (np.isnan(_percents[_columnName])):
_percents[_columnName] = 1;
else:
_percents[_columnName] = _percents[_columnName]+1
else:
if (np.isnan(_percents[_columnName])):
_percents[_columnName] = 0;
_percents = _percents/_count
_percents['Count'] = _count
_percents
print('\n\n\npercents=\n' + str(_percents))
In [ ]:
questionIndex = qPlayedHerocoliIndex
choice = qPlayedHerocoliYes
_form = gform
#def getPercentCorrectKnowingAnswer(questionIndex, choice, _form = gform):
_answerRows = getAllAnswerRows(questionIndex, choice, _form = _form);
getPercentCorrectPerColumn(_answerRows)
In [ ]:
#localplayerguid = '8d352896-a3f1-471c-8439-0f426df901c1'
#localplayerguid = '7037c5b2-c286-498e-9784-9a061c778609'
#localplayerguid = '5c4939b5-425b-4d19-b5d2-0384a515539e'
#localplayerguid = '7825d421-d668-4481-898a-46b51efe40f0'
#localplayerguid = 'acb9c989-b4a6-4c4d-81cc-6b5783ec71d8'
for id in getAllResponders():
print("===========================================")
print("id=" + str(id))
print("-------------------------------------------")
print(getAnswers(id).head(2))
print("-------------------------------------------")
print(getCorrections(id).head(2))
print("-------------------------------------------")
print("scores=" + str(getScore(id)))
print("#ValidatedCheckpoints=" + str(getValidatedCheckpointsCounts(id)))
print("#NonValidatedCheckpoints=" + str(getNonValidatedCheckpointsCounts(id)))
print("===========================================")
In [ ]:
gform[localplayerguidkey]
hasAnswered( '8d352896-a3f1-471c-8439-0f426df901c1' )
'8d352896-a3f1-471c-8439-0f426df901c1' in gform[localplayerguidkey].values
In [ ]:
apostropheTestString = 'it\'s a test'
apostropheTestString
In [ ]:
In [ ]:
#gformEN.head(2)
In [ ]:
#gformFR.head(2)
In [ ]:
#gformEN['Language'] = pd.Series('en', index=gformEN.index)
#gformFR['Language'] = pd.Series('fr', index=gformFR.index)
In [ ]:
#gformFR.head(2)
In [ ]:
# rename columns
#gformFR.columns = gformEN.columns
#gformFR.head(2)
In [ ]:
#gformTestMerge = pd.concat([gformEN, gformFR])
In [ ]:
#gformTestMerge.head(2)
In [ ]:
#gformTestMerge.tail(2)
In [ ]:
gform
In [ ]:
localplayerguid
In [ ]:
someAnswers = getAnswers( '8ca16c7a-70a6-4723-bd72-65b8485a2e86' )
someAnswers
In [ ]:
testQuestionIndex = 24
In [ ]:
thisUsersFirstEvaluationQuestion = str(someAnswers[someAnswers.columns[0]][testQuestionIndex])
thisUsersFirstEvaluationQuestion
In [ ]:
someAnswers[someAnswers.columns[0]]['Language']
In [ ]:
firstEvaluationQuestionCorrectAnswer = str(correctAnswers[testQuestionIndex])
firstEvaluationQuestionCorrectAnswer
In [ ]:
thisUsersFirstEvaluationQuestion.startswith(firstEvaluationQuestionCorrectAnswer)