In [1]:
import pandas as pd
import numpy as np

from matplotlib import pylab as plt

In [2]:
gradefile = "cs124.csv"
grades = pd.read_csv(gradefile)

In [47]:
len(grades[grades.Section != 'COLGSAS COMPSCI 124'])


Out[47]:
47

In [101]:
def getpset(pset,extension=False):
    if extension:
        subset = grades[grades.Section != 'COLGSAS COMPSCI 124']
        # remove students who scored a 0 assuming they've dropped the class
        
    else:
        subset = grades[grades.Section == 'COLGSAS COMPSCI 124']
        
    assert(len(subset) != len(grades))
    # make prettry
    import re
    subset = subset.filter(regex=pset)
    subset.columns = [re.sub(r'\([^)]*\)', '', name) for name in subset.columns]
    
    subset = subset.convert_objects(convert_numeric=True)
    
    # we remove user who received a score of zero if from extension
    if extension:
        subset = subset[subset[subset.columns[-1]] != 0]
    
    return subset

In [102]:
pset5 = getpset("Problem Set 5|Pset 5")
pset5e = getpset("Problem Set 5|Pset 5", True)
pset6 = getpset("Problem Set 6|Pset 6")
pset6e = getpset("Problem Set 6|Pset 6", True)

In [104]:
len(pset6e)


Out[104]:
22

In [105]:
corr6 = pset6.ix[:,:5].cov()
corr5 = pset5.ix[:,:5].cov()
corr5e = pset5e.ix[:,:5].cov()
corr6e = pset6e.ix[:,:5].cov()
corr6.head()


Out[105]:
Pset 6, Problem 1 Pset 6, Problem 2 Pset 6, Problem 3 Pset 6, Problem 4 Pset 6, Problem 5
Pset 6, Problem 1 4.972419 3.296868 1.830475 2.211774 1.876208
Pset 6, Problem 2 3.296868 12.238999 4.101189 4.868988 2.120012
Pset 6, Problem 3 1.830475 4.101189 5.559646 2.368988 2.183283
Pset 6, Problem 4 2.211774 4.868988 2.368988 6.129353 2.017474
Pset 6, Problem 5 1.876208 2.120012 2.183283 2.017474 8.415096

In [106]:
%matplotlib
import seaborn as sns

def make_corr_plot(d, title="plot"):
    f, ax = plt.subplots(figsize=(9, 9))
    cmap = sns.diverging_palette(220, 10, as_cmap=True)
    sns.corrplot(d, annot=False, sig_stars=False,
                 diag_names=False, cmap=cmap, ax=ax)
    f.tight_layout()
    plt.title(title)
    f.savefig(title)


Using matplotlib backend: agg

In [107]:
pset6.columns


Out[107]:
Index([u'Pset 6, Problem 1 ', u'Pset 6, Problem 2 ', u'Pset 6, Problem 3 ', u'Pset 6, Problem 4 ', u'Pset 6, Problem 5 ', u'Problem Set 6 Current Score', u'Problem Set 6 Final Score'], dtype='object')

In [108]:
make_corr_plot(corr6, "pset6_corr")
make_corr_plot(corr5, "pset5_corr")
make_corr_plot(corr6e, "pset6e_corr")
make_corr_plot(corr5e, "pset5e_corr")

In [109]:
def make_histogram(d, title="histogram",numBins=10):
    fig = plt.figure()
    ax = fig.add_subplot(111)
    d = d.dropna()
    nd = []
    for el in d:
        try:
            if el != 0:
                nd.append(float(el))
            else:
                print "Removed grade"
        except ValueError:
            pass
    ax.hist(nd,numBins,alpha=0.8)
    plt.title(title + "histogram")
    
    fig.savefig(title + ".png")

In [110]:
make_histogram(pset6['Problem Set 6 Final Score'], 'pset6_hist', 10)
make_histogram(pset6e['Problem Set 6 Final Score'], 'pset6e_hist', 5)
make_histogram(pset5e['Problem Set 5 Final Score'], 'pset5e_hist',5)
make_histogram(pset5['Problem Set 5 Final Score'], 'pset5_hist',10)

In [111]:
# on campus mean
def stats(pset):
    print "{:.2f} (standard deviation {:.2f}) and median {:.2f}".format(np.mean(pset), np.std(pset),np.median(pset))

In [115]:
stats(pset6e['Problem Set 6 Final Score'])


67.03 (standard deviation 20.65) and median 76.75

In [ ]: