In [168]:
import pandas as pd 
import numpy as np
from matplotlib import pyplot as plt 
import re
import time
plt.style.use('ggplot')
%matplotlib inline

Worapol B. and hamuel.me reserved some right maybe hahaha

for muic math club and muic student that want to use this as references

Import as DF

From the data seen below we will use "master" section subject only and we will use the number of student in "registered" not actual registered because registered include both master and joint section this will eliminate duplicate section we also remove subject that does not specify the date and time


In [173]:
df = pd.read_csv('t2_2016.csv')
df = df[df['Type'] == 'master']
df.head()


Out[173]:
Division Subject Section Type Instructor Capacity Actual Registered Registered Seat Available Room/Time Final
0 SCI EGCI213 Programming Paradigms 4(4-0-8) 1 master Rangsipan MARUKATAT 20 9 9 11 1408 (Tue 14:00 - 15:50) 1408 ... [Room-TBA]
1 SS ICSO303 Modern Social Theory 4(4-0-8) 1 master Hardina OHLENDORF 40 17 17 23 3304 (Tue 16:00 - 17:50) 3304 ... [Room-TBA]
2 BBA ICIS370 Web Programming 4(4-0-8) 1 master Chaivatna SUMETPHONG 40 6 6 34 1514/1 (Fri 12:00 - 13:50) 151... [Room-TBA]
3 SCI ICBI384 Field Study on Ecology and Biodiversit... 1 master Wayne Nicholas PHILLIPS 25 26 26 -1 NaN [Room-TBA]
6 SS ICSO401 Independent Study in the Social Scienc... 1 master Nigel Gould Davies 5 1 1 4 NaN [Room-TBA]

In [180]:
#format [Day, start_time, end_time]
def time_extract(s):
    s = str(s).strip().split(" "*16)
    def helper(s):
        try:
            temp = s.strip().split(" ")[1:]
            comb = temp[:2] + temp[3:]
            comb[0] = comb[0][1:]
            comb[2] = comb[2][:-1]
            return comb
        except:
            temp = s.strip().split(" ")
            comb = temp[:2] + temp[3:]
            comb[0] = comb[0][1:]
            comb[2] = comb[2][:-1]
            return comb
    top = helper(s[0])
    if len(s) > 1:
        bottom = helper(s[1])
        return top, bottom
    return top

# df.iloc[791]
# time_extract(df['Room/Time'][791])

tdf = df[df['Room/Time'].notnull()]['Room/Time']

tdf.apply(time_extract)[:10]


Out[180]:
0     ([Tue, 14:00, 15:50], [Thu, 14:00, 15:50])
1     ([Tue, 16:00, 17:50], [Thu, 16:00, 17:50])
2     ([Fri, 12:00, 13:50], [Fri, 14:00, 15:50])
9     ([Tue, 14:00, 15:50], [Thu, 14:00, 15:50])
16                           [Mon, 08:00, 12:00]
17    ([Mon, 10:00, 11:50], [Wed, 10:00, 11:50])
18    ([Tue, 14:00, 15:50], [Thu, 14:00, 15:50])
19    ([Mon, 14:00, 15:50], [Wed, 14:00, 15:50])
20    ([Tue, 12:00, 13:50], [Thu, 12:00, 13:50])
21                           [Fri, 08:00, 11:50]
Name: Room/Time, dtype: object

Here we want to generate a histogram that is in the following format

[t1 , t2, ..., tn]

Here t1 could is the time from 8 - 9 the following is the logic in putting the subject in the correct time freq we use the example of 8-10 we round up the 50 to 60

We aim to plot a histogram from Monday to Friday


In [175]:
def normalize_time(t):
    temp = t.split(":")
    h = int(temp[0]) * 60
    m = 60 if int(temp[1]) == 50 else 0
    return int(h + m)

Histogram for Monday

monday frequency of people in classes


In [189]:
def gen_hist(day):
    filtered = []
    for i,d in zip(tdf.index, tdf.apply(time_extract)):
        if len(d) == 2:
            for dd in d:
                if dd[0] == day:
                    filtered.append((i, dd))
        else:
            if d[0] == day:
                filtered.append((i, d))
    hist = []

    for i, d in filtered:
        start = normalize_time(d[1])
        end = normalize_time(d[2])
        cc = start 
        while cc <= end:
            for f in range(df['Registered'][i]):
                hist.append(cc/60)
            cc += 60
    plt.title("Student studying on " + day)
    plt.ylabel("Frequency")
    plt.xlabel("Time in hours")
    plt.hist(hist, bins=11);
#     return hist

gen_hist('Mon')


Histogram for Tuesday


In [190]:
gen_hist('Tue')



In [191]:
gen_hist('Wed')



In [192]:
gen_hist('Thu')



In [193]:
gen_hist('Fri')



In [194]:
gen_hist('Sat')



In [ ]: