Comparison of grades and assignments for the DTU course 'Data Mining using Python' (02819).
Two files from CampusNet should be download to a specific directory.
Finn Årup Nielsen, http://www.compute.dtu.dk/~faan/
In [1]:
from os.path import join, expanduser
from lxml import etree
import matplotlib.pyplot as plt
import pandas as pd
In [2]:
semester = 'E13'
In [3]:
# Note data files need to be saved in particular directory structure!
directory = expanduser('~/data/dtu02819')
filename_grades = join(directory, semester,
'Karakterindberetning - Danmarks Tekniske Universitet.html')
filename_assignment = join(directory, semester,
'Resultater.xlsx')
In [4]:
# Read saved HTML with grades
tree = etree.HTML(open(filename_grades).read())
table_element = tree.xpath("//table[@class='deltagerliste']")[0]
elements = table_element.xpath(".//tr")
grades_dict = [dict(zip(['Bruger', 'Name', 'Grade'],
[node.text for node in element.iter()][1:])) for element in elements]
grades = pd.DataFrame(grades_dict)
In [5]:
# Read Excel sheet downloaded from 'Assignments' on Campusnet
assignment = pd.read_excel(filename_assignment, 'Resultater', skiprows=3)
In [6]:
# Merge data sets on study number
data = pd.merge(grades, assignment, on='Bruger')
In [7]:
# Convert the string grade to numeric
try:
data.ix[data['Grade']=='EM', 'Grade'] = -2
except TypeError:
pass
data['Grade'] = data['Grade'].astype(int)
In [8]:
%matplotlib inline
data.plot(x='Score', y='Grade', kind='scatter')
plt.show()
In [ ]: