notebook.community

Edit and run



In [12]:

    
from pml.api import *
import pandas as pd
import matplotlib.pyplot as plt



In [2]:

    
data = load("../dataset_ext2.csv")
data = data.drop_empty_samples()
data.fill_missing_with_feature_means()



In [3]:

    
data.get_label_value_counts()









    Out[3]:





f    30
s    26
p    16



In [13]:

    
# Get first principal component
princomp = pca(data, 1)
signed_weights = pd.Series(princomp.weights[:, 0], 
                           index=princomp._original_features)



In [15]:

    
print signed_weights.order(ascending=False)









    



ENGR120    0.002989
ENGR110    0.002226
ENGL135   -0.089390
CHEM150   -0.242886
MATH101   -0.260933
ELEC199   -0.268938
MATH100   -0.271468
PHYS125   -0.287089
PHYS122   -0.288521
MECH141   -0.320300
CSC115    -0.335191
CSC111    -0.339158
MATH110   -0.472499



In [17]:

    
min_weight = signed_weights.min()
max_weight = signed_weights.max()
print min_weight, max_weight









    



-0.472498927759 0.00298931311672



In [21]:

    
normalized_weights = ((signed_weights - min_weight) 
                       / (max_weight - min_weight))
print normalized_weights.order(ascending=False)









    



ENGR120    1.000000
ENGR110    0.998394
ENGL135    0.805716
CHEM150    0.482898
MATH101    0.444945
ELEC199    0.428110
MATH100    0.422787
PHYS125    0.389935
PHYS122    0.386925
MECH141    0.320089
CSC115     0.288773
CSC111     0.280429
MATH110    0.000000



In [ ]: