notebook.community

Edit and run



In [1]:

    
from pml.api import *
import matplotlib.pyplot as plt



In [2]:

    
data = load("../dataset_ext2.csv")
data = data.drop_empty_samples()
data.fill_missing_with_feature_means()



In [3]:

    
data.get_label_value_counts()









    Out[3]:





f    30
s    26
p    16



In [4]:

    
# Get first principal component
princomp = pca(data, 1)
princomp.get_first_component_impacts()









    Out[4]:





MATH110    0.472499
CSC111     0.339158
CSC115     0.335191
MECH141    0.320300
PHYS122    0.288521
PHYS125    0.287089
MATH100    0.271468
ELEC199    0.268938
MATH101    0.260933
CHEM150    0.242886
ENGL135    0.089390
ENGR120    0.002989
ENGR110    0.002226



In [5]:

    
princomp.feature_list()









    Out[5]:





[0]



In [14]:

    
# plot configuration
markers = {"s": "o", "p": "^", "f": "x"}
colours = {"s": "g", "p": "y", "f": "r"}

# width x height in inches
figsize = (15, 10)



In [9]:

    
# Plot 1st PC Weights For Each Sample
fig = plt.figure(figsize=figsize)
for label in data.get_label_set():
    filtered = data.label_filter(label)
    
    xs = filtered.get_sample_ids()
    ys = princomp.get_rows(xs).get_column(0)

    plt.scatter(xs, ys, color=colours[label], 
                marker=markers[label])

plt.xlabel("Sample Id")
plt.ylabel("First Principal Component")
plt.title("1st PC Weights For Each Sample")

fig.savefig("scatter_pca_weights.png")



In [11]:

    
# For each course, plot grades for each sample (student)
for i, course in enumerate(data.feature_list()):
    # Plot 1 used for 1st PC, so start at 2 here
    fig = plt.figure(figsize=figsize)

    for label in data.get_label_set():
        filtered = data.label_filter(label)
    
        xs = filtered.get_sample_ids()
        ys = filtered.get_rows(xs).get_column(course)

        plt.scatter(xs, ys, color=colours[label], 
                    marker=markers[label])

    plt.xlabel("Sample Id")
    plt.ylabel("%s Grade" % course)
    plt.title("%s Grade For Each Sample" % course)
    
    fig.savefig("scatter_%s.png" % course)



In [ ]: