In [11]:
#The usual imports
from __future__ import division
import math
from collections import OrderedDict
from pandas import read_csv
import numpy as np
from pymatgen.util.plotting_utils import get_publication_quality_plot
from monty.string import remove_non_ascii
import prettyplotlib as ppl
from prettyplotlib import brewer2mpl
import matplotlib.pyplot as plt
colors = brewer2mpl.get_map('Set1', 'qualitative', 8).mpl_colors
%matplotlib inline
In [12]:
# Define lower grade cutoffs in terms of number of standard deviations from mean.
grade_cutoffs = OrderedDict()
#grade_cutoffs["A+"] = 1.5
#grade_cutoffs["A"] = 1
grade_cutoffs["A"] = 0.75
grade_cutoffs["B+"] = 0.5
grade_cutoffs["B"] = -0.25
grade_cutoffs["B-"] = -0.5
grade_cutoffs["C+"] = -0.75
grade_cutoffs["C"] = -1
grade_cutoffs["C-"] = -2
grade_cutoffs["F"] = float("-inf")
Load data from exported CSV from Ted Full Grade Center. Some sanitization is performed to remove non-ascii characters and cruft
In [13]:
def load_data(filename):
d = read_csv(filename)
d.columns = [remove_non_ascii(c) for c in d.columns]
d.columns = [c.split("[")[0].strip().strip("\"") for c in d.columns]
d["Weighted Total"] = [float(i.strip("%")) for i in d["Weighted Total"]]
print(d.columns)
return d
In [14]:
d = load_data("gc_CENG114_WI16_Ong_fullgc_2016-03-15-19-58-36.csv")
In [15]:
def bar_plot(dframe, data_key, offset=0):
"""
Creates a historgram of the results.
Args:
dframe: DataFrame which is imported from CSV.
data_key: Specific column to plot
offset: Allows an offset for each grade. Defaults to 0.
Returns:
dict of cutoffs, {grade: (lower, upper)}
"""
data = dframe[data_key]
d = filter(lambda x: (not np.isnan(x)), list(data))
N = len(d)
print N
heights, bins = np.histogram(d, bins=20, range=(0, 100))
bins = list(bins)
bins.pop(-1)
import matplotlib.pyplot as plt
fig, ax = plt.subplots(1)
ppl.bar(ax, bins, heights, width=5, color=colors[0], grid='y')
plt = get_publication_quality_plot(12, 8, plt)
plt.xlabel("Score")
plt.ylabel("Number of students")
#print len([d for d in data if d > 90])
mean = data.mean(0)
sigma = data.std()
maxy = np.max(heights)
prev_cutoff = 100
cutoffs = {}
grade = ["A", "B+", "B", "B-", "C+", "C", "C-", "F"]
for grade, cutoff in grade_cutoffs.items():
if cutoff == float("-inf"):
cutoff = 0
else:
cutoff = max(0, mean + cutoff * sigma) + offset
plt.plot([cutoff] * 2, [0, maxy], 'k--')
plt.annotate("%.1f" % cutoff, [cutoff, maxy - 1], fontsize=18, horizontalalignment='left', rotation=45)
n = len([d for d in data if cutoff <= d < prev_cutoff])
print "Grade %s (%.1f-%.1f): %d (%.2f%%)" % (grade, cutoff, prev_cutoff, n, n*1.0/N*100)
plt.annotate(grade, [(cutoff + prev_cutoff) / 2, maxy], fontsize=18, horizontalalignment='center')
cutoffs[grade] = (cutoff, prev_cutoff)
prev_cutoff = cutoff
plt.ylim([0, maxy * 1.1])
plt.annotate("$\mu = %.1f$\n$\sigma = %.1f$\n$max=%.1f$" % (mean, sigma, data.max()), xy=(10, 7), fontsize=30)
title = data_key.split("[")[0].strip()
plt.title(title, fontsize=30)
plt.tight_layout()
plt.savefig("%s.png" % title)
return cutoffs
In [16]:
for c in d.columns:
if "PS" in c or "Midterm" in c or "Final" in c:
if not all(np.isnan(d[c])):
print c
bar_plot(d, c)
In [17]:
cutoffs = bar_plot(d, "Weighted Total", offset=-2)
In [18]:
print cutoffs
In [19]:
def assign_grade(pts):
for g, c in cutoffs.items():
if c[0] < pts <= c[1]:
return g
#d = load_data("gc_CENG114_WI16_Ong_fullgc_2016-03-21-15-47-06.csv") #use revised gc
d["Final_Assigned_Egrade"] = map(assign_grade, d["Weighted Total"])
d.to_csv("Overall grades_OLD.csv")
print("Written!")
In [ ]:
In [ ]:
In [ ]: