Purpose: determine in what extent the current data can accurately describe correlations, underlying factors on the score. Especially concerning the answerTemporalities[0] groups: are there underlying groups explaining the discrepancies in score? Are those groups tied to certain questions?
In [ ]:
%run "../Functions/2. Google form analysis.ipynb"
In [ ]:
form = getPerfectPretestPostestPairs(gform)
In [ ]:
binarized = getAllBinarized()
In [ ]:
answersCount = len(binarized.index)
totalScorePerQuestion = pd.DataFrame(data=np.dot(np.ones(answersCount),binarized),index=binarized.columns,columns=['score'])
totalScorePerQuestion['perc'] = totalScorePerQuestion['score'] * 100 / answersCount
display(totalScorePerQuestion.sort_values(by = 'score'))
In [ ]:
biologists = getSurveysOfBiologists(form)
nonBiologists = form.drop(biologists.index)
biologistsScores = biologists.apply(getGFormRowScore, axis=1)
nonBiologistsScores = nonBiologists.apply(getGFormRowScore, axis=1)
#print(len(gform), len(biologists), len(nonBiologists))
#print(len(gform), len(biologistsScores), len(nonBiologistsScores))
#print(type(biologistsScores), len(biologistsScores),\
#type(nonBiologistsScores), len(nonBiologistsScores))
ttest = ttest_ind(biologistsScores, nonBiologistsScores)
ttest
In [ ]:
biologistsScores.values
In [ ]:
np.std(biologistsScores)
In [ ]:
np.std(nonBiologistsScores)
Conclusion: the two groups have distinct scores.
In [ ]:
gfBefores = getGFormBefores(form)
biologistsBefores = getSurveysOfBiologists(gfBefores, hardPolicy = False)
nonBiologistsBefores = gfBefores.drop(biologistsBefores.index)
biologistsBeforesScores = biologistsBefores.apply(getGFormRowScore, axis=1)
nonBiologistsBeforesScores = nonBiologistsBefores.apply(getGFormRowScore, axis=1)
#print(len(gfBefores), len(biologistsBefores), len(nonBiologistsBefores))
#print(len(gfBefores), len(biologistsBeforesScores), len(nonBiologistsBeforesScores))
#print(type(biologistsScores), len(biologistsScores),\
#type(nonBiologistsScores), len(nonBiologistsScores))
ttest = ttest_ind(biologistsBeforesScores, nonBiologistsBeforesScores)
ttest
In [ ]:
np.std(biologistsBeforesScores)
In [ ]:
nonBiologistsBeforesScores
In [ ]:
np.std(nonBiologistsBeforesScores)