In [12]:
from pml.api import *
import pandas as pd
import matplotlib.pyplot as plt

In [2]:
data = load("../dataset_ext2.csv")
data = data.drop_empty_samples()
data.fill_missing_with_feature_means()

In [3]:
data.get_label_value_counts()


Out[3]:
f    30
s    26
p    16

In [13]:
# Get first principal component
princomp = pca(data, 1)
signed_weights = pd.Series(princomp.weights[:, 0], 
                           index=princomp._original_features)

In [15]:
print signed_weights.order(ascending=False)


ENGR120    0.002989
ENGR110    0.002226
ENGL135   -0.089390
CHEM150   -0.242886
MATH101   -0.260933
ELEC199   -0.268938
MATH100   -0.271468
PHYS125   -0.287089
PHYS122   -0.288521
MECH141   -0.320300
CSC115    -0.335191
CSC111    -0.339158
MATH110   -0.472499

In [17]:
min_weight = signed_weights.min()
max_weight = signed_weights.max()
print min_weight, max_weight


-0.472498927759 0.00298931311672

In [21]:
normalized_weights = ((signed_weights - min_weight) 
                       / (max_weight - min_weight))
print normalized_weights.order(ascending=False)


ENGR120    1.000000
ENGR110    0.998394
ENGL135    0.805716
CHEM150    0.482898
MATH101    0.444945
ELEC199    0.428110
MATH100    0.422787
PHYS125    0.389935
PHYS122    0.386925
MECH141    0.320089
CSC115     0.288773
CSC111     0.280429
MATH110    0.000000

In [ ]: