Grade/assignment comparison

Comparison of grades and assignments for the DTU course 'Data Mining using Python' (02819).

Two files from CampusNet should be download to a specific directory.

Author

Finn Årup Nielsen, http://www.compute.dtu.dk/~faan/


In [1]:
from os.path import join, expanduser
from lxml import etree
import matplotlib.pyplot as plt
import pandas as pd

In [2]:
semester = 'E13'

In [3]:
# Note data files need to be saved in particular directory structure!
directory = expanduser('~/data/dtu02819')
filename_grades = join(directory, semester, 
                       'Karakterindberetning - Danmarks Tekniske Universitet.html')
filename_assignment = join(directory, semester,
                           'Resultater.xlsx')

In [4]:
# Read saved HTML with grades
tree = etree.HTML(open(filename_grades).read())
table_element = tree.xpath("//table[@class='deltagerliste']")[0]
elements = table_element.xpath(".//tr")
grades_dict = [dict(zip(['Bruger', 'Name', 'Grade'],
                        [node.text for node in element.iter()][1:])) for element in elements]
grades = pd.DataFrame(grades_dict)

In [5]:
# Read Excel sheet downloaded from 'Assignments' on Campusnet
assignment = pd.read_excel(filename_assignment, 'Resultater', skiprows=3)

In [6]:
# Merge data sets on study number
data = pd.merge(grades, assignment, on='Bruger')

In [7]:
# Convert the string grade to numeric
try: 
    data.ix[data['Grade']=='EM', 'Grade'] = -2
except TypeError:
    pass
data['Grade'] = data['Grade'].astype(int)

In [8]:
%matplotlib inline
data.plot(x='Score', y='Grade', kind='scatter')
plt.show()



In [ ]: