In [ ]:
from pySankey import sankey
%run "../Functions/6. Time analysis.ipynb"
print("7. Question groups variation analysis")
In [ ]:
def analyseQuestion(allData, q):
pretestScores = allData.loc[answerTemporalities[0] + " " + q, :]
posttestScores = allData.loc[answerTemporalities[1] + " " + q, :]
deltaScores = allData.loc[deltaPrefix + " " + q, :]
print("variation: %0.2f (+/- %0.2f)" % (deltaScores.mean(), deltaScores.std()))
print("from %0.2f (+/- %0.2f) to %0.2f (+/- %0.2f)" % \
(pretestScores.mean(), pretestScores.std(),\
posttestScores.mean(), posttestScores.std(),))
plt.boxplot(deltaScores)
plt.show()
In [ ]:
# questionsCoding contains points attributed to each answer
def compareUsingCustomCorrection(gfdf, questions, questionsCoding):
minPotentialScore = 0
maxPotentialScore = 0
for gradingDictionary in questionsCoding:
minPotentialScore += min(gradingDictionary.values())
maxPotentialScore += max(gradingDictionary.values())
minPotentialScore, maxPotentialScore
print("%s < score < %s" % (minPotentialScore, maxPotentialScore))
# split temporalities
gfdfPretest = gfdf[gfdf[QTemporality]==answerTemporalities[0]]
gfdfPretest.index = gfdfPretest[QUserId]
gfdfPostest = gfdf[gfdf[QTemporality]==answerTemporalities[1]]
gfdfPostest.index = gfdfPostest[QUserId]
# only keep relevant questions
gfdfPretest = gfdfPretest.loc[:, questions]
gfdfPostest = gfdfPostest.loc[:, questions]
# code the answers
for (q, c) in zip(questions, questionsCoding):
gfdfPretest[q] = gfdfPretest[q].apply(lambda t: c[t])
gfdfPostest[q] = gfdfPostest[q].apply(lambda t: c[t])
# compute delta
# gfdfDelta = gfdfPostest - gfdfPretest
gfdfResult = gfdfPostest - gfdfPretest
gfdfResult.columns = [deltaPrefix + " " + q for q in questions]
gfdfResult[[answerTemporalities[0] + " " + q for q in questions]] = gfdfPretest
gfdfResult[[answerTemporalities[1] + " " + q for q in questions]] = gfdfPostest
return gfdfResult.T
In [ ]:
def getScoresOnQuestionsFromAllData(allData, Qs):
pretestQs = [answerTemporalities[0] + " " + q for q in Qs]
posttestQs = [answerTemporalities[1] + " " + q for q in Qs]
deltaQs = [deltaPrefix + " " + q for q in Qs]
pretestScores = allData.loc[pretestQs, :].sum()
posttestScores = allData.loc[posttestQs, :].sum()
deltaScores = allData.loc[deltaQs, :].sum()
return (pretestScores, posttestScores, deltaScores)
In [ ]:
def analyseQuestionGroup(
gfdf,
Qs,
grading,
plotGraphs = True,
printData = True,
saveFiles = False,
title = "",
qualitativeCoding = None,
):
allData = compareUsingCustomCorrection(gfdf, Qs, grading)
(pretestScores, posttestScores, deltaScores) = getScoresOnQuestionsFromAllData(allData, Qs)
questionGroupStem = "question group"
pretestColumn = answerTemporalities[0] + " " + questionGroupStem
posttestColumn = answerTemporalities[1] + " " + questionGroupStem
deltaColumn = deltaPrefix + " " + questionGroupStem
gfdfResult = pd.DataFrame(
data = [deltaScores, pretestScores, posttestScores],
columns = deltaScores.index,
index = [deltaColumn, pretestColumn, posttestColumn],
)
plotPretestPosttestDeltaGfdf(
gfdfResult,
[questionGroupStem],
plotGraphs = plotGraphs,
printData = printData,
saveFiles = saveFiles,
title = title,
qualitativeCoding = qualitativeCoding,
)
In [ ]:
def getReorderedByPrefix(prefixes, prefixed):
result = []
for prefix in prefixes:
boolIndex = [s.startswith(prefix) for s in prefixed]
if any(boolIndex):
result += [prefixed[boolIndex.index(True)]]
return result
In [ ]:
def plotPretestPosttestDeltaGfdf(allData,
questions,
plotGraphs = True,
printData = True,
saveFiles = False,
title = "",
suffix = "",
fontsize=10,
qualitativeCoding = None):
variationSuffix = ' - variation'
pretestPosttestSuffix = ' - pretest posttest'
sankeySuffix = ' - Sankey'
qualitativeSuffix = ' - qualitative'
# sample size
print("n = " + str(len(allData.columns)))
print()
print()
for q in questions:
deltaScores = allData.loc[deltaPrefix + " " +q ,:]
pretestScores = allData.loc[answerTemporalities[0] + " " + q ,:]
posttestScores = allData.loc[answerTemporalities[1] + " " + q ,:]
if qualitativeCoding != None:
if (not all([(i in qualitativeCoding.keys()) for i in pretestScores.values]))\
or (not all([(i in qualitativeCoding.keys()) for i in posttestScores.values])):
qualitativeCoding = None
if printData:
print(q)
print("variation: %0.2f (+/- %0.2f)" % (deltaScores.mean(), deltaScores.std()))
print("from %0.2f (+/- %0.2f) to %0.2f (+/- %0.2f)" % \
(pretestScores.mean(), pretestScores.std(),\
posttestScores.mean(), posttestScores.std(),))
print(ttest_ind(pretestScores, posttestScores))
if plotGraphs:
#plt.boxplot(deltaScores)
#plt.show()
fig = plt.figure()
ax = plt.subplot(111)
# if pd.isnull(deltaScores).any():
# print("pd.isnull(deltaScores).any(): " + str(deltaScores.index[pd.isnull(deltaScores)]))
plt.hist(deltaScores, bins=int(max(deltaScores) - min(deltaScores) + 1), figure = fig)
#sns.distplot(deltaScores, bins = np.arange(min(deltaScores),max(deltaScores)))
if len(title) == 0:
_title = '"' + q + '"' + variationSuffix + suffix
else:
_title = title + variationSuffix
plt.title(_title)
plt.xlabel("score variation")
plt.ylabel("count")
plt.show()
if saveFiles:
fig.savefig(_title.replace('"', ""))
fig = plt.figure()
ax = plt.subplot(111)
plt.hist(pretestScores, bins=int(max(pretestScores) - min(pretestScores) + 1), label='pretest', alpha=0.5, figure = fig)
plt.hist(posttestScores, bins=int(max(posttestScores) - min(posttestScores) + 1), label='posttest', alpha=0.5, figure = fig)
plt.legend()
if len(title) == 0:
_title = '"' + q + '"' + pretestPosttestSuffix + suffix
else:
_title = title + pretestPosttestSuffix
plt.title(_title)
plt.xlabel("score")
plt.ylabel("count")
plt.show()
if saveFiles:
fig.savefig(_title.replace('"', ""))
classesDF = pd.DataFrame(columns = ['pretest', 'posttest'])
# indexes should be scorePretest->scorePosttest for each such existing pair
# label is then the str(score)
weight = pd.Series()
for userId in pretestScores.index:
changeIndex = "{0:0=2d}".format(int(pretestScores[userId]))+"->{0:0=2d}".format(int(posttestScores[userId]))
if changeIndex in weight.index:
weight[changeIndex] += 1
else:
weight[changeIndex] = 1
if qualitativeCoding != None:
classesDF.loc[changeIndex, 'pretest'] = qualitativeCoding[int(pretestScores[userId])] \
+ " (" + "{0:0=2d}".format(len(pretestScores[pretestScores == pretestScores[userId]])) + ")"
classesDF.loc[changeIndex, 'posttest'] = qualitativeCoding[int(posttestScores[userId])] \
+ " (" + "{0:0=2d}".format(len(posttestScores[posttestScores == posttestScores[userId]])) + ")"
else:
classesDF.loc[changeIndex, 'pretest'] = "{0:0=2d}".format(int(pretestScores[userId])) \
+ " (" + "{0:0=2d}".format(len(pretestScores[pretestScores == pretestScores[userId]])) + ")"
classesDF.loc[changeIndex, 'posttest'] = "{0:0=2d}".format(int(posttestScores[userId])) \
+ " (" + "{0:0=2d}".format(len(posttestScores[posttestScores == posttestScores[userId]])) + ")"
left = classesDF['pretest'].sort_index().values
right = classesDF['posttest'].sort_index().values
leftWeight = weight.sort_index().values.astype(float)
rightWeight = leftWeight
if qualitativeCoding != None:
leftLabels = getReorderedByPrefix(qualitativeCoding.values(), classesDF['pretest'].unique())
rightLabels = getReorderedByPrefix(qualitativeCoding.values(), classesDF['posttest'].unique())
else:
leftLabels = sorted(classesDF['pretest'].unique())
rightLabels = sorted(classesDF['posttest'].unique())
if len(title) == 0:
_title = '"' + q + '"' + sankeySuffix
if qualitativeCoding != None:
_title += qualitativeSuffix
_title += suffix
else:
_title = title + sankeySuffix
if qualitativeCoding != None:
_title += qualitativeSuffix
if saveFiles:
filename = _title.replace('"', "")
else:
filename = None
sankey.sankey(
left=left,
right=right,
leftWeight=leftWeight,
rightWeight=rightWeight,
leftLabels=leftLabels,
rightLabels=rightLabels,
aspect=20,
fontsize=fontsize,
figureName=filename,
title=_title,
)
if printData:
print()
print()
print()
In [ ]:
def getDeviceQuestionsGrading(correctAnsCost,halfCorAnsCost,dontKnoAnsCost,incorreAnsCost,):
r0 = DeviceAnswersPossibleAnswersEN[0]
r1 = DeviceAnswersPossibleAnswersEN[1]
r2 = DeviceAnswersPossibleAnswersEN[2]
r3 = DeviceAnswersPossibleAnswersEN[3]
r4 = DeviceAnswersPossibleAnswersEN[4]
r5 = DeviceAnswersPossibleAnswersEN[5]
QDeviceRbsPconsFlhdcTerCoding = {r0:correctAnsCost, r1:incorreAnsCost, r2:incorreAnsCost, r3:incorreAnsCost, r4:incorreAnsCost, r5:dontKnoAnsCost,}
QDevicePconsRbsFlhdcTerCoding = {r0:incorreAnsCost, r1:halfCorAnsCost, r2:halfCorAnsCost, r3:correctAnsCost, r4:halfCorAnsCost, r5:dontKnoAnsCost,}
QDevicePbadRbsGfpTerCoding = {r0:incorreAnsCost, r1:halfCorAnsCost, r2:correctAnsCost, r3:halfCorAnsCost, r4:halfCorAnsCost, r5:dontKnoAnsCost,}
QDevicePbadGfpRbsTerCoding = {r0:correctAnsCost, r1:incorreAnsCost, r2:incorreAnsCost, r3:incorreAnsCost, r4:incorreAnsCost, r5:dontKnoAnsCost,}
QDeviceGfpRbsPconsTerCoding = {r0:correctAnsCost, r1:incorreAnsCost, r2:incorreAnsCost, r3:incorreAnsCost, r4:incorreAnsCost, r5:dontKnoAnsCost,}
QDevicePconsGfpRbsTerCoding = {r0:correctAnsCost, r1:incorreAnsCost, r2:incorreAnsCost, r3:incorreAnsCost, r4:incorreAnsCost, r5:dontKnoAnsCost,}
QDeviceAmprRbsPconsTerCoding = {r0:correctAnsCost, r1:incorreAnsCost, r2:incorreAnsCost, r3:incorreAnsCost, r4:incorreAnsCost, r5:dontKnoAnsCost,}
QDeviceRbsPconsAmprTerCoding = {r0:correctAnsCost, r1:incorreAnsCost, r2:incorreAnsCost, r3:incorreAnsCost, r4:incorreAnsCost, r5:dontKnoAnsCost,}
return [
QDeviceRbsPconsFlhdcTerCoding,
QDevicePconsRbsFlhdcTerCoding,
QDevicePbadRbsGfpTerCoding,
QDevicePbadGfpRbsTerCoding,
QDeviceGfpRbsPconsTerCoding,
QDevicePconsGfpRbsTerCoding,
QDeviceAmprRbsPconsTerCoding,
QDeviceRbsPconsAmprTerCoding,
]
In [ ]:
def getBioBrickFunctionsQuestionsGrading(correctAnsCost,halfCorAnsCost,dontKnoAnsCost,incorreAnsCost,):
r0 = BioBrickAnswersPossibleAnswersEN[0] #"None of these"
r1 = BioBrickAnswersPossibleAnswersEN[1] #"TER"
r2 = BioBrickAnswersPossibleAnswersEN[2] #"PR"
r3 = BioBrickAnswersPossibleAnswersEN[3] #"CDS"
r4 = BioBrickAnswersPossibleAnswersEN[4] #"RBS"
r5 = BioBrickAnswersPossibleAnswersEN[5] #"Plasmid"
r6 = BioBrickAnswersPossibleAnswersEN[6] #"I don't know"
QBBFunctionTERCoding = {r0:incorreAnsCost, r1:correctAnsCost, r2:halfCorAnsCost, r3:halfCorAnsCost, r4:halfCorAnsCost, r5:incorreAnsCost, r6:dontKnoAnsCost,}
QBBFunctionGameCDSCoding = {r0:incorreAnsCost, r1:halfCorAnsCost, r2:halfCorAnsCost, r3:correctAnsCost, r4:halfCorAnsCost, r5:incorreAnsCost, r6:dontKnoAnsCost,}
QBBFunctionBiologyCDSCoding = {r0:incorreAnsCost, r1:halfCorAnsCost, r2:halfCorAnsCost, r3:correctAnsCost, r4:halfCorAnsCost, r5:incorreAnsCost, r6:dontKnoAnsCost,}
QBBFunctionPRCoding = {r0:incorreAnsCost, r1:halfCorAnsCost, r2:correctAnsCost, r3:halfCorAnsCost, r4:halfCorAnsCost, r5:incorreAnsCost, r6:dontKnoAnsCost,}
QBBFunctionRBSCoding = {r0:incorreAnsCost, r1:halfCorAnsCost, r2:halfCorAnsCost, r3:halfCorAnsCost, r4:correctAnsCost, r5:incorreAnsCost, r6:dontKnoAnsCost,}
return [
QBBFunctionTERCoding,
QBBFunctionGameCDSCoding,
QBBFunctionBiologyCDSCoding,
QBBFunctionPRCoding,
QBBFunctionRBSCoding,
]
In [ ]:
def getQGenotypePhenotypeGrading(correctAnsCost,halfCorAnsCost,dontKnoAnsCost,incorreAnsCost,):
r0 = QGenotypePhenotypePossibleAnswersEN[0]
r1 = QGenotypePhenotypePossibleAnswersEN[1]
r1bis = 'Gather nanorobots'
r2 = QGenotypePhenotypePossibleAnswersEN[2]
r3 = QGenotypePhenotypePossibleAnswersEN[3]
r4 = QGenotypePhenotypePossibleAnswersEN[4]
# 'Edit the DNA of the bacterium',
# 'Gather nanobots',
# 'Gather nanorobots',
# 'Move the bacterium',
# 'Divide the bacterium',
# "I don't know"
QGenotypePhenotypeGrading = {r0:correctAnsCost, r1:incorreAnsCost, r1bis:incorreAnsCost, r2:halfCorAnsCost, r3:incorreAnsCost, r4:dontKnoAnsCost,}
return [
QGenotypePhenotypeGrading
]
In [ ]:
def getInductionQuestionsGrading(correctAnsCost,halfCorAnsCost,dontKnoAnsCost,incorreAnsCost,):
r0 = DeviceAnswersPossibleAnswersEN[0]
r1 = DeviceAnswersPossibleAnswersEN[1]
r2 = DeviceAnswersPossibleAnswersEN[2]
r3 = DeviceAnswersPossibleAnswersEN[3]
r4 = DeviceAnswersPossibleAnswersEN[4]
r5 = QDevicePbadRbsAraTerPossibleAnswersEN[0]
r6 = QDevicePbadRbsAraTerPossibleAnswersEN[1]
r7 = QDevicePbadRbsAraTerPossibleAnswersEN[2]
r8 = QDevicePbadRbsAraTerPossibleAnswersEN[3]
rIDK = DeviceAnswersPossibleAnswersEN[5]
# 'The bricks are not well-ordered',
# 'It generates green fluorescence',
# 'It generates green fluorescence in presence of arabinose inducer',
# 'It makes it possible to move faster',
# 'It generates antibiotic resistance'
QDeviceRbsPconsFlhdcTerGrading = {r0:halfCorAnsCost, r1:halfCorAnsCost, r2:incorreAnsCost, r3:halfCorAnsCost, r4:halfCorAnsCost, rIDK:dontKnoAnsCost,}
QDevicePconsRbsFlhdcTerGrading = {r0:halfCorAnsCost, r1:halfCorAnsCost, r2:incorreAnsCost, r3:halfCorAnsCost, r4:halfCorAnsCost, rIDK:dontKnoAnsCost,}
QDeviceGfpRbsPconsTerGrading = {r0:halfCorAnsCost, r1:halfCorAnsCost, r2:incorreAnsCost, r3:halfCorAnsCost, r4:halfCorAnsCost, rIDK:dontKnoAnsCost,}
QDevicePconsGfpRbsTerGrading = {r0:halfCorAnsCost, r1:halfCorAnsCost, r2:incorreAnsCost, r3:halfCorAnsCost, r4:halfCorAnsCost, rIDK:dontKnoAnsCost,}
QDeviceAmprRbsPconsTerGrading = {r0:halfCorAnsCost, r1:halfCorAnsCost, r2:incorreAnsCost, r3:halfCorAnsCost, r4:halfCorAnsCost, rIDK:dontKnoAnsCost,}
QDeviceRbsPconsAmprTerGrading = {r0:halfCorAnsCost, r1:halfCorAnsCost, r2:incorreAnsCost, r3:halfCorAnsCost, r4:halfCorAnsCost, rIDK:dontKnoAnsCost,}
QDevicePbadRbsGfpTerGrading = {r0:halfCorAnsCost, r1:incorreAnsCost, r2:correctAnsCost, r3:halfCorAnsCost, r4:halfCorAnsCost, rIDK:dontKnoAnsCost,}
QDevicePbadGfpTbsTerGrading = {r0:halfCorAnsCost, r1:halfCorAnsCost, r2:halfCorAnsCost, r3:halfCorAnsCost, r4:halfCorAnsCost, rIDK:dontKnoAnsCost,}
QDevicePbadRbsAraTerGrading = {r5:halfCorAnsCost, r6:halfCorAnsCost, r7:correctAnsCost, r8:incorreAnsCost, rIDK:dontKnoAnsCost,}
return [
QDeviceRbsPconsFlhdcTerGrading,
QDevicePconsRbsFlhdcTerGrading,
QDeviceGfpRbsPconsTerGrading,
QDevicePconsGfpRbsTerGrading,
QDeviceAmprRbsPconsTerGrading,
QDeviceRbsPconsAmprTerGrading,
QDevicePbadRbsGfpTerGrading,
QDevicePbadGfpTbsTerGrading,
QDevicePbadRbsAraTerGrading,
]
In [ ]:
def getQuestionsGradingSubset(allQuestions, questionsSubset, grading):
result = []
for i in range(len(questionsSubset)):
result += [allQuestions.index(questionsSubset[i])]
return [grading[i] for i in result]
In [ ]: