In [1]:
import pandas as pd
import numpy as np
from matplotlib import pylab as plt
In [2]:
gradefile = "cs124.csv"
grades = pd.read_csv(gradefile)
In [47]:
len(grades[grades.Section != 'COLGSAS COMPSCI 124'])
Out[47]:
In [101]:
def getpset(pset,extension=False):
if extension:
subset = grades[grades.Section != 'COLGSAS COMPSCI 124']
# remove students who scored a 0 assuming they've dropped the class
else:
subset = grades[grades.Section == 'COLGSAS COMPSCI 124']
assert(len(subset) != len(grades))
# make prettry
import re
subset = subset.filter(regex=pset)
subset.columns = [re.sub(r'\([^)]*\)', '', name) for name in subset.columns]
subset = subset.convert_objects(convert_numeric=True)
# we remove user who received a score of zero if from extension
if extension:
subset = subset[subset[subset.columns[-1]] != 0]
return subset
In [102]:
pset5 = getpset("Problem Set 5|Pset 5")
pset5e = getpset("Problem Set 5|Pset 5", True)
pset6 = getpset("Problem Set 6|Pset 6")
pset6e = getpset("Problem Set 6|Pset 6", True)
In [104]:
len(pset6e)
Out[104]:
In [105]:
corr6 = pset6.ix[:,:5].cov()
corr5 = pset5.ix[:,:5].cov()
corr5e = pset5e.ix[:,:5].cov()
corr6e = pset6e.ix[:,:5].cov()
corr6.head()
Out[105]:
In [106]:
%matplotlib
import seaborn as sns
def make_corr_plot(d, title="plot"):
f, ax = plt.subplots(figsize=(9, 9))
cmap = sns.diverging_palette(220, 10, as_cmap=True)
sns.corrplot(d, annot=False, sig_stars=False,
diag_names=False, cmap=cmap, ax=ax)
f.tight_layout()
plt.title(title)
f.savefig(title)
In [107]:
pset6.columns
Out[107]:
In [108]:
make_corr_plot(corr6, "pset6_corr")
make_corr_plot(corr5, "pset5_corr")
make_corr_plot(corr6e, "pset6e_corr")
make_corr_plot(corr5e, "pset5e_corr")
In [109]:
def make_histogram(d, title="histogram",numBins=10):
fig = plt.figure()
ax = fig.add_subplot(111)
d = d.dropna()
nd = []
for el in d:
try:
if el != 0:
nd.append(float(el))
else:
print "Removed grade"
except ValueError:
pass
ax.hist(nd,numBins,alpha=0.8)
plt.title(title + "histogram")
fig.savefig(title + ".png")
In [110]:
make_histogram(pset6['Problem Set 6 Final Score'], 'pset6_hist', 10)
make_histogram(pset6e['Problem Set 6 Final Score'], 'pset6e_hist', 5)
make_histogram(pset5e['Problem Set 5 Final Score'], 'pset5e_hist',5)
make_histogram(pset5['Problem Set 5 Final Score'], 'pset5_hist',10)
In [111]:
# on campus mean
def stats(pset):
print "{:.2f} (standard deviation {:.2f}) and median {:.2f}".format(np.mean(pset), np.std(pset),np.median(pset))
In [115]:
stats(pset6e['Problem Set 6 Final Score'])
In [ ]: