In [168]:
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
import re
import time
plt.style.use('ggplot')
%matplotlib inline
Worapol B. and hamuel.me reserved some right maybe hahaha
for muic math club and muic student that want to use this as references
From the data seen below we will use "master" section subject only and we will use the number of student in "registered" not actual registered because registered include both master and joint section this will eliminate duplicate section we also remove subject that does not specify the date and time
In [173]:
df = pd.read_csv('t2_2016.csv')
df = df[df['Type'] == 'master']
df.head()
Out[173]:
In [180]:
#format [Day, start_time, end_time]
def time_extract(s):
s = str(s).strip().split(" "*16)
def helper(s):
try:
temp = s.strip().split(" ")[1:]
comb = temp[:2] + temp[3:]
comb[0] = comb[0][1:]
comb[2] = comb[2][:-1]
return comb
except:
temp = s.strip().split(" ")
comb = temp[:2] + temp[3:]
comb[0] = comb[0][1:]
comb[2] = comb[2][:-1]
return comb
top = helper(s[0])
if len(s) > 1:
bottom = helper(s[1])
return top, bottom
return top
# df.iloc[791]
# time_extract(df['Room/Time'][791])
tdf = df[df['Room/Time'].notnull()]['Room/Time']
tdf.apply(time_extract)[:10]
Out[180]:
Here we want to generate a histogram that is in the following format
[t1 , t2, ..., tn]
Here t1 could is the time from 8 - 9 the following is the logic in putting the subject in the correct time freq we use the example of 8-10 we round up the 50 to 60
We aim to plot a histogram from Monday to Friday
In [175]:
def normalize_time(t):
temp = t.split(":")
h = int(temp[0]) * 60
m = 60 if int(temp[1]) == 50 else 0
return int(h + m)
In [189]:
def gen_hist(day):
filtered = []
for i,d in zip(tdf.index, tdf.apply(time_extract)):
if len(d) == 2:
for dd in d:
if dd[0] == day:
filtered.append((i, dd))
else:
if d[0] == day:
filtered.append((i, d))
hist = []
for i, d in filtered:
start = normalize_time(d[1])
end = normalize_time(d[2])
cc = start
while cc <= end:
for f in range(df['Registered'][i]):
hist.append(cc/60)
cc += 60
plt.title("Student studying on " + day)
plt.ylabel("Frequency")
plt.xlabel("Time in hours")
plt.hist(hist, bins=11);
# return hist
gen_hist('Mon')
In [190]:
gen_hist('Tue')
In [191]:
gen_hist('Wed')
In [192]:
gen_hist('Thu')
In [193]:
gen_hist('Fri')
In [194]:
gen_hist('Sat')
In [ ]: