1. Import Data


In [114]:
import pandas as pd
import numpy as np
import pickle

In [115]:
df_file = pd.read_csv('../data/df_dropSub_less20_dropNaResult.csv',delimiter=",", skip_blank_lines = True, 
                 error_bad_lines=False)
df_file = df_file.drop('Unnamed: 0',axis=1)
df_file = df_file.fillna(0)
df_file = df_file.replace(['A', 'B+', 'B', 'C+', 'C' , 'D+' , 'D' , 'F' , 'W' , 'S' , 'S#' , 'U' , 'U#'], 
                     [8, 7, 7, 6 , 6, 5, 5, 4, 3, 2, 2, 1, 1])

In [116]:
df_file


Out[116]:
Unnamed: 0.1 3COURSEID 4RESULT 0STUDENTID 1ACADYEAR 2SEMESTER AT316 AT326 BA291 CJ315 ... TA395 TH161 TU100 TU110 TU120 TU122 TU130 TU154 PROVINCEID SCHOOLGPA
0 0 CS101 6 316644 2552 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 12 3.32
1 1 CS102 6 316644 2552 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 12 3.32
2 2 EL171 5 316644 2552 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 12 3.32
3 3 SC135 4 316644 2552 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 12 3.32
4 4 SC185 6 316644 2552 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 12 3.32
5 5 TH161 6 316644 2552 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 12 3.32
6 6 TU154 5 316644 2552 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 12 3.32
7 7 CS111 5 316644 2552 2 0 0 0 0 ... 0 6 0 0 0 0 0 5 12 3.32
8 8 EL172 4 316644 2552 2 0 0 0 0 ... 0 6 0 0 0 0 0 5 12 3.32
9 9 MA211 4 316644 2552 2 0 0 0 0 ... 0 6 0 0 0 0 0 5 12 3.32
10 10 PY228 7 316644 2552 2 0 0 0 0 ... 0 6 0 0 0 0 0 5 12 3.32
11 11 TU110 6 316644 2552 2 0 0 0 0 ... 0 6 0 0 0 0 0 5 12 3.32
12 12 TU120 5 316644 2552 2 0 0 0 0 ... 0 6 0 0 0 0 0 5 12 3.32
13 13 TU130 7 316644 2552 2 0 0 0 0 ... 0 6 0 0 0 0 0 5 12 3.32
14 14 TU122 7 316644 2552 3 0 0 0 0 ... 0 6 0 6 5 0 7 5 12 3.32
15 15 AT326 8 316644 2553 1 0 0 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
16 16 CS213 6 316644 2553 1 0 0 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
17 17 CS214 7 316644 2553 1 0 0 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
18 18 CS222 7 316644 2553 1 0 0 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
19 19 CS223 7 316644 2553 1 0 0 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
20 20 CS284 7 316644 2553 1 0 0 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
21 21 MA211 5 316644 2553 1 0 0 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
22 22 SW111 5 316644 2553 1 0 0 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
23 23 AT316 7 316644 2553 2 0 8 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
24 24 CS251 6 316644 2553 2 0 8 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
25 25 CS261 7 316644 2553 2 0 8 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
26 26 CS281 7 316644 2553 2 0 8 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
27 27 MA332 6 316644 2553 2 0 8 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
28 28 SC135 6 316644 2553 2 0 8 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
29 29 ST216 6 316644 2553 2 0 8 0 0 ... 0 6 0 6 5 7 7 5 12 3.32
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
27965 31292 EL070 2 447240 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 48 3.75
27966 31293 MA211 4 447240 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 48 3.75
27967 31294 ST216 4 447240 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 48 3.75
27968 31295 TH161 6 447240 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 48 3.75
27969 31296 TU154 4 447240 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 48 3.75
27970 31297 CS101 5 447241 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 13 2.60
27971 31298 CS102 5 447241 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 13 2.60
27972 31299 CS105 5 447241 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 13 2.60
27973 31300 EL070 2 447241 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 13 2.60
27974 31301 MA211 3 447241 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 13 2.60
27975 31302 ST216 3 447241 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 13 2.60
27976 31303 TH161 5 447241 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 13 2.60
27977 31304 TU154 3 447241 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 13 2.60
27978 31313 CS101 5 447242 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 34 2.93
27979 31314 CS102 5 447242 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 34 2.93
27980 31315 CS105 5 447242 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 34 2.93
27981 31316 EL171 3 447242 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 34 2.93
27982 31317 MA211 3 447242 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 34 2.93
27983 31318 ST216 3 447242 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 34 2.93
27984 31319 TH161 6 447242 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 34 2.93
27985 31320 TU154 5 447242 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 34 2.93
27986 31325 SC185 3 447242 2557 2 0 0 0 0 ... 0 6 0 0 0 0 0 5 34 2.93
27987 31329 CS101 4 447243 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 84 2.08
27988 31330 CS102 4 447243 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 84 2.08
27989 31331 CS105 4 447243 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 84 2.08
27990 31332 EL070 1 447243 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 84 2.08
27991 31333 MA211 4 447243 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 84 2.08
27992 31334 ST216 4 447243 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 84 2.08
27993 31335 TH161 4 447243 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 84 2.08
27994 31336 TU154 4 447243 2557 1 0 0 0 0 ... 0 0 0 0 0 0 0 0 84 2.08

27995 rows × 119 columns


In [117]:
count_courseId = df_file["3COURSEID"].value_counts() 
more20 = count_courseId

headers=list(df_file.columns.values)
subjects = []
countSub = 0

In [118]:
count = 0
subjects.sort()
precision_rf={}
df_precision = more20.drop('CS231').copy()

list_allsub = df_file.columns[4:]
allSubject_df = pd.DataFrame(columns=[subjects],index=[list_allsub])
top10_df = pd.DataFrame(columns=[subjects])

In [119]:
headers=list(df_file.columns.values)
subjects = []
countSub = 0
#Create dictionary of list subjects
for sub in df_file[headers[1]]:
    if sub not in subjects:
        subjects.append(sub)
        countSub = countSub+1

In [120]:
subjects.sort()

remove CS231 because hasn't final grade


In [121]:
subjects.remove('CS231')

In [122]:
len(subjects)


Out[122]:
110

In [123]:
subject = 'CS213'
print subject             
df_sub = df_file[df_file['3COURSEID'] == subject]
df_sub = df_sub.iloc[np.random.permutation(len(df_sub))]

A = df_sub.as_matrix()


CS213

In [124]:
df_sub


Out[124]:
Unnamed: 0.1 3COURSEID 4RESULT 0STUDENTID 1ACADYEAR 2SEMESTER AT316 AT326 BA291 CJ315 ... TA395 TH161 TU100 TU110 TU120 TU122 TU130 TU154 PROVINCEID SCHOOLGPA
18360 18815 CS213 5 383158 2555 1 0 0 0 0 ... 0 7 0 5 7 0 0 6 1 2.89
8248 8373 CS213 6 351677 2554 1 0 0 0 0 ... 0 7 0 6 7 0 0 5 76 3.52
26254 28313 CS213 4 427191 2557 1 0 0 0 0 ... 0 7 7 0 6 0 0 5 1 3.16
6012 6102 CS213 6 336767 2554 2 0 0 0 0 ... 0 7 0 5 5 0 6 6 1 2.96
22873 24077 CS213 6 402958 2556 1 0 0 0 0 ... 0 6 0 0 7 0 0 5 25 3.11
5925 6015 CS213 3 336766 2553 1 0 0 0 0 ... 0 5 0 0 7 0 0 6 1 2.71
7454 7561 CS213 8 351661 2554 1 0 0 0 0 ... 0 7 0 7 7 0 0 7 1 3.18
6765 6857 CS213 7 336787 2554 2 0 0 0 0 ... 0 6 0 6 5 0 6 5 1 3.10
20942 21822 CS213 5 397410 2556 1 0 0 0 0 ... 0 7 0 5 7 0 0 3 45 3.42
16141 16466 CS213 6 379808 2555 1 0 0 0 0 ... 0 7 0 6 6 6 0 6 72 3.43
3959 4027 CS213 5 336718 2554 2 0 0 0 0 ... 0 6 0 5 6 0 6 6 41 3.28
18457 18917 CS213 6 383161 2555 1 0 0 0 0 ... 0 7 0 5 7 0 0 6 1 3.30
20013 20706 CS213 7 397374 2556 1 0 0 0 0 ... 0 7 0 6 6 0 0 6 12 3.52
11660 11824 CS213 7 354659 2554 1 0 0 0 0 ... 0 7 0 0 7 0 0 8 92 3.29
20861 21727 CS213 7 397406 2556 1 0 0 0 0 ... 0 7 7 5 0 0 0 8 12 3.72
4564 4644 CS213 5 336731 2554 2 0 0 0 0 ... 0 7 0 5 8 0 7 6 66 3.14
2944 2997 CS213 6 329232 2553 1 0 0 0 0 ... 0 6 0 7 0 0 7 6 1 3.72
20705 21541 CS213 5 397400 2556 1 0 0 0 0 ... 0 7 0 5 6 0 0 5 1 3.70
18072 18511 CS213 7 383151 2555 1 0 0 0 0 ... 0 7 0 6 7 0 0 6 1 2.83
24150 25658 CS213 4 423592 2557 1 0 0 0 0 ... 0 6 7 5 5 0 6 6 12 3.29
748 765 CS213 3 329179 2553 1 0 0 0 0 ... 0 6 0 6 6 0 0 5 51 3.63
19149 19670 CS213 6 389416 2556 1 0 0 0 0 ... 0 8 0 6 6 0 0 6 16 3.49
12445 12618 CS213 7 361060 2554 1 0 0 0 0 ... 0 8 0 0 8 0 0 7 90 3.61
17383 17774 CS213 5 383127 2556 2 0 0 0 0 ... 0 7 0 6 7 0 5 4 30 3.49
21247 22188 CS213 6 397428 2556 1 0 0 0 0 ... 0 7 7 5 0 6 0 5 1 3.87
23053 24278 CS213 4 402966 2556 1 0 0 0 0 ... 0 6 0 0 7 0 0 3 1 2.35
22450 23596 CS213 4 402944 2556 1 0 0 0 0 ... 0 6 0 0 8 0 0 4 1 2.99
2802 2854 CS213 6 329229 2553 1 0 0 0 0 ... 0 7 0 7 6 0 0 7 90 3.32
19358 19929 CS213 7 392361 2556 1 0 0 0 0 ... 0 7 0 0 8 0 0 8 1 3.74
4840 4920 CS213 5 336737 2553 1 0 0 0 0 ... 0 6 0 0 5 0 0 6 1 2.55
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
16569 16915 CS213 5 380294 2555 1 0 0 0 0 ... 0 6 0 0 8 0 0 5 41 3.13
23788 25185 CS213 6 423575 2557 1 0 0 0 0 ... 0 8 7 7 7 5 6 5 20 3.62
7042 7140 CS213 7 344245 2554 1 0 0 0 0 ... 0 5 0 0 7 0 0 6 1 3.04
3848 3916 CS213 6 336715 2555 2 0 0 0 0 ... 0 6 0 5 6 0 7 5 53 3.12
2231 2274 CS213 7 329212 2553 1 0 0 0 0 ... 0 6 0 5 5 0 7 6 1 3.56
16991 17360 CS213 5 380309 2555 1 0 0 0 0 ... 0 6 0 0 8 0 0 3 1 2.64
11831 11995 CS213 6 354662 2555 2 0 0 5 0 ... 0 3 0 6 8 0 7 5 1 3.08
25456 27352 CS213 5 424324 2557 1 0 0 0 0 ... 0 7 7 0 8 0 0 5 32 3.06
24053 25532 CS213 4 423587 2557 1 0 0 0 0 ... 0 7 7 3 6 0 3 6 86 3.47
9041 9178 CS213 6 351695 2555 2 0 0 0 0 ... 0 7 0 6 7 7 7 5 15 3.57
9832 9981 CS213 6 351716 2554 1 0 0 0 0 ... 0 7 0 6 7 0 0 5 1 3.11
17208 17590 CS213 6 381730 2556 2 0 0 0 0 ... 0 7 0 5 7 0 5 5 12 3.31
22853 24057 CS213 3 402957 2557 1 0 0 0 0 ... 0 6 0 4 7 0 5 4 1 2.10
556 568 CS213 7 329174 2554 2 0 0 0 0 ... 0 7 0 5 5 6 6 6 1 3.69
1188 1210 CS213 6 329188 2555 1 0 8 0 0 ... 0 6 0 5 5 0 0 6 1 3.32
2851 2903 CS213 7 329230 2553 1 0 0 0 0 ... 0 6 0 5 6 0 7 6 61 3.62
585 599 CS213 6 329175 2553 1 0 0 0 0 ... 0 6 0 5 6 0 0 6 1 3.69
8546 8675 CS213 5 351684 2554 1 0 0 0 0 ... 0 7 0 6 5 0 0 5 86 3.33
3869 3937 CS213 5 336716 2553 1 0 0 0 0 ... 0 6 0 0 7 0 0 6 1 3.12
10517 10674 CS213 6 351734 2555 2 7 7 0 0 ... 0 7 0 6 7 0 6 5 1 3.61
19943 20622 CS213 5 397372 2556 1 0 0 0 0 ... 0 7 7 6 0 6 0 7 20 3.68
1413 1440 CS213 6 329193 2553 1 0 0 0 0 ... 0 6 0 5 5 0 0 7 1 3.50
26123 28152 CS213 4 427183 2557 1 0 0 0 0 ... 0 7 7 0 7 0 0 5 11 2.95
8299 8424 CS213 6 351678 2554 1 0 0 0 0 ... 0 7 0 7 7 0 0 6 80 3.67
9879 10028 CS213 5 351717 2554 1 0 0 0 0 ... 0 6 0 0 8 0 0 6 96 3.44
26700 28861 CS213 4 427882 2557 1 0 0 0 0 ... 0 7 7 0 7 0 0 3 1 2.21
13675 13862 CS213 8 368308 2555 1 0 0 0 0 ... 0 7 0 5 7 5 0 6 11 3.79
18281 18733 CS213 6 383156 2555 1 0 0 0 0 ... 0 7 0 5 7 0 0 5 11 3.08
21207 22141 CS213 6 397427 2556 1 0 0 0 0 ... 0 7 7 3 0 0 0 7 1 2.65
10855 11012 CS213 6 352724 2554 1 0 0 0 0 ... 0 6 0 0 7 0 0 3 1 3.54

765 rows × 119 columns


In [125]:
def classify( X ):
    with open('tree_drop/treeCS213.pic', 'rb') as pickleFile:
        clf2 = pickle.load(pickleFile)
    clf2.predict(X)
    Grade=['A', 'B', 'C' , 'D' , 'F' , 'W' , 'S' , 'U' ,'na']
    grade_predicted = Grade[::-1][clf2.predict(X)]
    grade_prop = clf2.predict_proba(X)
    print "prediction: ",grade_predicted 
    print "prop is ",grade_prop
    print "clf is ",clf2.predict(X)
    return

In [126]:
A[:,6:116]


Out[126]:
array([[0L, 0L, 0L, ..., 7L, 0L, 0L],
       [0L, 0L, 0L, ..., 7L, 0L, 0L],
       [0L, 0L, 0L, ..., 6L, 0L, 0L],
       ..., 
       [0L, 0L, 0L, ..., 7L, 0L, 0L],
       [0L, 0L, 0L, ..., 0L, 0L, 0L],
       [0L, 0L, 0L, ..., 7L, 0L, 0L]], dtype=object)

In [127]:
X = A[0,6:116]

In [128]:
X


Out[128]:
array([0L, 0L, 0L, 0L, 0L, 0L, 0L, 6L, 6L, 0L, 5L, 0L, 0L, 0L, 0L, 0L, 0L,
       0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
       0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
       0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
       0L, 0L, 0L, 0L, 2L, 6L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 7L, 0L, 0L, 0L,
       0L, 7L, 0L, 8L, 0L, 6L, 0L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
       0L, 0L, 7L, 0L, 5L, 7L, 0L, 0L], dtype=object)

In [129]:
X.shape


Out[129]:
(110L,)

In [130]:
classify( X );


prediction:  D
prop is  [[ 0.   0.1  0.8  0.1  0.   0. ]]
clf is  [5]

In [131]:
X = A[1:2,6:116]

In [132]:
X.shape


Out[132]:
(1L, 110L)

In [133]:
classify( X );


prediction:  C
prop is  [[ 0.   0.   0.   0.9  0.   0.1]]
clf is  [6]

In [134]:
def classify( X ):
    for subject in subjects:
        f = "tree_drop/tree%s.pic"%subject
        with open(f, 'rb') as pickleFile:
            clf2 = pickle.load(pickleFile)
        clf2.predict(X)
        Grade=['A', 'B', 'C' , 'D' , 'F' , 'W' , 'S' , 'U' ,'na']
        grade_predicted = Grade[::-1][clf2.predict(X)]
        grade_prop = np.max(clf2.predict_proba(X))
        print "prediction of %s: "%subject,grade_predicted 
        print "prop is ",grade_prop
        print "clf is ",clf2.predict(X)
    return

In [135]:
classify( X );


prediction of AT316:  B
prop is  0.8
clf is  [7]
prediction of AT326:  B
prop is  0.7
clf is  [7]
prediction of BA291:  W
prop is  0.4
clf is  [3]
prediction of CJ315:  B
prop is  0.8
clf is  [7]
prediction of CJ316:  B
prop is  0.8
clf is  [7]
prediction of CJ317:  B
prop is  0.9
clf is  [7]
prediction of CJ321:  B
prop is  0.4
clf is  [7]
prediction of CS101:  C
prop is  0.7
clf is  [6]
prediction of CS102:  B
prop is  0.9
clf is  [7]
prediction of CS105:  D
prop is  0.297651006711
clf is  [5]
prediction of CS111:  B
prop is  0.6
clf is  [7]
prediction of CS115:  B
prop is  0.5
clf is  [7]
prediction of CS211:  C
prop is  0.4
clf is  [6]
prediction of CS213:  C
prop is  0.9
clf is  [6]
prediction of CS214:  A
prop is  0.7
clf is  [8]
prediction of CS215:  C
prop is  0.4
clf is  [6]
prediction of CS222:  B
prop is  0.8
clf is  [7]
prediction of CS223:  C
prop is  0.6
clf is  [6]
prediction of CS251:  F
prop is  0.3
clf is  [4]
prediction of CS261:  B
prop is  0.7
clf is  [7]
prediction of CS281:  C
prop is  0.6
clf is  [6]
prediction of CS284:  B
prop is  0.8
clf is  [7]
prediction of CS285:  W
prop is  0.5
clf is  [3]
prediction of CS286:  B
prop is  0.4
clf is  [7]
prediction of CS288:  A
prop is  0.5
clf is  [8]
prediction of CS289:  D
prop is  0.6
clf is  [5]
prediction of CS295:  C
prop is  0.4
clf is  [6]
prediction of CS296:  B
prop is  0.5
clf is  [7]
prediction of CS297:  C
prop is  0.6
clf is  [6]
prediction of CS300:  S
prop is  0.8
clf is  [2]
prediction of CS301:  B
prop is  0.7
clf is  [7]
prediction of CS302:  D
prop is  0.6
clf is  [5]
prediction of CS311:  C
prop is  0.7
clf is  [6]
prediction of CS314:  W
prop is  0.3
clf is  [3]
prediction of CS326:  C
prop is  0.6
clf is  [6]
prediction of CS341:  D
prop is  0.4
clf is  [5]
prediction of CS342:  C
prop is  0.8
clf is  [6]
prediction of CS348:  B
prop is  0.6
clf is  [7]
prediction of CS356:  C
prop is  0.8
clf is  [6]
prediction of CS365:  D
prop is  0.4
clf is  [5]
prediction of CS366:  W
prop is  0.5
clf is  [3]
prediction of CS367:  C
prop is  0.5
clf is  [6]
prediction of CS374:  D
prop is  0.7
clf is  [5]
prediction of CS377:  C
prop is  0.3
clf is  [6]
prediction of CS385:  C
prop is  0.6
clf is  [6]
prediction of CS386:  D
prop is  0.7
clf is  [5]
prediction of CS387:  D
prop is  0.5
clf is  [5]
prediction of CS388:  A
prop is  0.5
clf is  [8]
prediction of CS395:  W
prop is  0.4
clf is  [3]
prediction of CS396:  D
prop is  0.4
clf is  [5]
prediction of CS397:  B
prop is  0.5
clf is  [7]
prediction of CS398:  C
prop is  0.5
clf is  [6]
prediction of CS399:  B
prop is  0.6
clf is  [7]
prediction of CS401:  C
prop is  0.4
clf is  [6]
prediction of CS402:  C
prop is  0.3
clf is  [6]
prediction of CS407:  B
prop is  0.5
clf is  [7]
prediction of CS408:  B
prop is  0.8
clf is  [7]
prediction of CS409:  D
prop is  0.5
clf is  [5]
prediction of CS426:  C
prop is  0.7
clf is  [6]
prediction of CS427:  C
prop is  0.6
clf is  [6]
prediction of CS429:  A
prop is  0.4
clf is  [8]
prediction of CS446:  C
prop is  0.5
clf is  [6]
prediction of CS449:  B
prop is  0.9
clf is  [7]
prediction of CS456:  W
prop is  0.4
clf is  [3]
prediction of CS457:  B
prop is  0.6
clf is  [7]
prediction of CS459:  C
prop is  0.5
clf is  [6]
prediction of CS467:  A
prop is  0.4
clf is  [8]
prediction of CS486:  C
prop is  0.5
clf is  [6]
prediction of CS487:  C
prop is  0.5
clf is  [6]
prediction of CS488:  A
prop is  0.8
clf is  [8]
prediction of CS489:  C
prop is  0.5
clf is  [6]
prediction of EL070:  S
prop is  0.964717183182
clf is  [2]
prediction of EL171:  D
prop is  0.4
clf is  [5]
prediction of EL172:  C
prop is  0.6
clf is  [6]
prediction of EL295:  C
prop is  0.9
clf is  [6]
prediction of EL395:  B
prop is  0.4
clf is  [7]
prediction of ES356:  B
prop is  1.0
clf is  [7]
prediction of HO201:  B
prop is  0.9
clf is  [7]
prediction of HR201:  B
prop is  0.6
clf is  [7]
prediction of LA209:  B
prop is  1.0
clf is  [7]
prediction of MA211:  C
prop is  0.5
clf is  [6]
prediction of MA212:  B
prop is  1.0
clf is  [7]
prediction of MA216:  W
prop is  0.8
clf is  [3]
prediction of MA332:  W
prop is  0.3
clf is  [3]
prediction of MW313:  B
prop is  0.9
clf is  [7]
prediction of MW314:  B
prop is  0.6
clf is  [7]
prediction of NS132:  B
prop is  1.0
clf is  [7]
prediction of PY228:  D
prop is  0.5
clf is  [5]
prediction of SC123:  F
prop is  0.5
clf is  [4]
prediction of SC135:  F
prop is  0.6
clf is  [4]
prediction of SC173:  B
prop is  0.5
clf is  [7]
prediction of SC185:  C
prop is  0.57
clf is  [6]
prediction of SO201:  W
prop is  0.7
clf is  [3]
prediction of ST216:  B
prop is  0.5
clf is  [7]
prediction of SW111:  B
prop is  0.8
clf is  [7]
prediction of SW212:  B
prop is  0.5
clf is  [7]
prediction of SW213:  B
prop is  1.0
clf is  [7]
prediction of SW221:  B
prop is  0.9
clf is  [7]
prediction of SW335:  A
prop is  0.5
clf is  [8]
prediction of SW365:  B
prop is  1.0
clf is  [7]
prediction of SW475:  A
prop is  0.6
clf is  [8]
prediction of SW478:  B
prop is  0.9
clf is  [7]
prediction of TA395:  A
prop is  1.0
clf is  [8]
prediction of TH161:  B
prop is  0.94638487208
clf is  [7]
prediction of TU100:  B
prop is  0.5
clf is  [7]
prediction of TU110:  C
prop is  0.9
clf is  [6]
prediction of TU120:  B
prop is  0.6
clf is  [7]
prediction of TU122:  C
prop is  0.4
clf is  [6]
prediction of TU130:  B
prop is  0.5
clf is  [7]
prediction of TU154:  D
prop is  0.5
clf is  [5]

In [136]:
X


Out[136]:
array([[0L, 0L, 0L, 0L, 0L, 0L, 0L, 7L, 6L, 0L, 7L, 0L, 0L, 0L, 0L, 0L, 0L,
        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
        0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
        0L, 0L, 0L, 0L, 0L, 7L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L,
        0L, 0L, 0L, 7L, 0L, 6L, 0L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
        0L, 0L, 7L, 0L, 6L, 7L, 0L, 0L]], dtype=object)

In [137]:
X[-1]


Out[137]:
array([0L, 0L, 0L, 0L, 0L, 0L, 0L, 7L, 6L, 0L, 7L, 0L, 0L, 0L, 0L, 0L, 0L,
       0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
       0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
       0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
       0L, 0L, 0L, 0L, 0L, 7L, 6L, 0L, 0L, 0L, 0L, 0L, 0L, 3L, 0L, 0L, 0L,
       0L, 0L, 0L, 7L, 0L, 6L, 0L, 7L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L, 0L,
       0L, 0L, 7L, 0L, 6L, 7L, 0L, 0L], dtype=object)

2. Load tree and prediction each subjects


In [157]:
y = []
probability = []
def classify( X ):
    for i in range(0,110):
        if X[i] == 0:
            subject = subjects[i]
            f = "tree_drop/tree%s.pic"%subject
            with open(f, 'rb') as pickleFile:
                    clf2 = pickle.load(pickleFile)
            clf2.predict(X)
            Grade=['A', 'B', 'C' , 'D' , 'F' , 'W' , 'S' , 'U' ,'na']
            grade_predicted = Grade[::-1][clf2.predict(X)]
            prob = np.max(clf2.predict_proba(X))
            print "prediction of %s: "%subject,grade_predicted 
            print "probabity is ",prob
            probability.append(prob)
            y.append(grade_predicted)
            
        elif X[i] != 0: 
            subject = subjects[i]
            Grade=['A', 'B', 'C' , 'D' , 'F' , 'W' , 'S' , 'U' ,'na']
            grade_truth=Grade[::-1][X[i]]
            prob = "no probability"
            print "grade %s has already is "%subject,grade_truth
            print prob
            probability.append(prob)
            y.append(grade_truth)
    print "list of all grade predicted is %s"%y 
    return

In [154]:
X=X[-1]

In [155]:
classify( X );


prediction of AT316:  B
probabity is  0.8
prediction of AT326:  B
probabity is  0.7
prediction of BA291:  W
probabity is  0.4
prediction of CJ315:  B
probabity is  0.8
prediction of CJ316:  B
probabity is  0.8
prediction of CJ317:  B
probabity is  0.9
prediction of CJ321:  B
probabity is  0.4
grade CS101 has already is  B
no probability
grade CS102 has already is  C
no probability
prediction of CS105:  D
probabity is  0.297651006711
grade CS111 has already is  B
no probability
prediction of CS115:  B
probabity is  0.5
prediction of CS211:  C
probabity is  0.4
prediction of CS213:  C
probabity is  0.9
prediction of CS214:  A
probabity is  0.7
prediction of CS215:  C
probabity is  0.4
prediction of CS222:  B
probabity is  0.8
prediction of CS223:  C
probabity is  0.6
prediction of CS251:  F
probabity is  0.3
prediction of CS261:  B
probabity is  0.7
prediction of CS281:  C
probabity is  0.6
prediction of CS284:  B
probabity is  0.8
prediction of CS285:  W
probabity is  0.5
prediction of CS286:  B
probabity is  0.4
prediction of CS288:  A
probabity is  0.5
prediction of CS289:  D
probabity is  0.6
prediction of CS295:  C
probabity is  0.4
prediction of CS296:  B
probabity is  0.5
prediction of CS297:  C
probabity is  0.6
prediction of CS300:  S
probabity is  0.8
prediction of CS301:  B
probabity is  0.7
prediction of CS302:  D
probabity is  0.6
prediction of CS311:  C
probabity is  0.7
prediction of CS314:  W
probabity is  0.3
prediction of CS326:  C
probabity is  0.6
prediction of CS341:  D
probabity is  0.4
prediction of CS342:  C
probabity is  0.8
prediction of CS348:  B
probabity is  0.6
prediction of CS356:  C
probabity is  0.8
prediction of CS365:  D
probabity is  0.4
prediction of CS366:  W
probabity is  0.5
prediction of CS367:  C
probabity is  0.5
prediction of CS374:  D
probabity is  0.7
prediction of CS377:  C
probabity is  0.3
prediction of CS385:  C
probabity is  0.6
prediction of CS386:  D
probabity is  0.7
prediction of CS387:  D
probabity is  0.5
prediction of CS388:  A
probabity is  0.5
prediction of CS395:  W
probabity is  0.4
prediction of CS396:  D
probabity is  0.4
prediction of CS397:  B
probabity is  0.5
prediction of CS398:  C
probabity is  0.5
prediction of CS399:  B
probabity is  0.6
prediction of CS401:  C
probabity is  0.4
prediction of CS402:  C
probabity is  0.3
prediction of CS407:  B
probabity is  0.5
prediction of CS408:  B
probabity is  0.8
prediction of CS409:  D
probabity is  0.5
prediction of CS426:  C
probabity is  0.7
prediction of CS427:  C
probabity is  0.6
prediction of CS429:  A
probabity is  0.4
prediction of CS446:  C
probabity is  0.5
prediction of CS449:  B
probabity is  0.9
prediction of CS456:  W
probabity is  0.4
prediction of CS457:  B
probabity is  0.6
prediction of CS459:  C
probabity is  0.5
prediction of CS467:  A
probabity is  0.4
prediction of CS486:  C
probabity is  0.5
prediction of CS487:  C
probabity is  0.5
prediction of CS488:  A
probabity is  0.8
prediction of CS489:  C
probabity is  0.5
prediction of EL070:  S
probabity is  0.964717183182
prediction of EL171:  D
probabity is  0.4
grade EL172 has already is  B
no probability
grade EL295 has already is  C
no probability
prediction of EL395:  B
probabity is  0.4
prediction of ES356:  B
probabity is  1.0
prediction of HO201:  B
probabity is  0.9
prediction of HR201:  B
probabity is  0.6
prediction of LA209:  B
probabity is  1.0
prediction of MA211:  C
probabity is  0.5
grade MA212 has already is  W
no probability
prediction of MA216:  W
probabity is  0.8
prediction of MA332:  W
probabity is  0.3
prediction of MW313:  B
probabity is  0.9
prediction of MW314:  B
probabity is  0.6
prediction of NS132:  B
probabity is  1.0
prediction of PY228:  D
probabity is  0.5
grade SC123 has already is  B
no probability
prediction of SC135:  F
probabity is  0.6
grade SC173 has already is  C
no probability
prediction of SC185:  C
probabity is  0.57
grade SO201 has already is  B
no probability
prediction of ST216:  B
probabity is  0.5
prediction of SW111:  B
probabity is  0.8
prediction of SW212:  B
probabity is  0.5
prediction of SW213:  B
probabity is  1.0
prediction of SW221:  B
probabity is  0.9
prediction of SW335:  A
probabity is  0.5
prediction of SW365:  B
probabity is  1.0
prediction of SW475:  A
probabity is  0.6
prediction of SW478:  B
probabity is  0.9
prediction of TA395:  A
probabity is  1.0
prediction of TH161:  B
probabity is  0.94638487208
grade TU100 has already is  B
no probability
prediction of TU110:  C
probabity is  0.9
grade TU120 has already is  C
no probability
grade TU122 has already is  B
no probability
prediction of TU130:  B
probabity is  0.5
prediction of TU154:  D
probabity is  0.5
list of all grade predicted is ['B', 'B', 'W', 'B', 'B', 'B', 'B', 'B', 'C', 'D', 'B', 'B', 'C', 'C', 'A', 'C', 'B', 'C', 'F', 'B', 'C', 'B', 'W', 'B', 'A', 'D', 'C', 'B', 'C', 'S', 'B', 'D', 'C', 'W', 'C', 'D', 'C', 'B', 'C', 'D', 'W', 'C', 'D', 'C', 'C', 'D', 'D', 'A', 'W', 'D', 'B', 'C', 'B', 'C', 'C', 'B', 'B', 'D', 'C', 'C', 'A', 'C', 'B', 'W', 'B', 'C', 'A', 'C', 'C', 'A', 'C', 'S', 'D', 'B', 'C', 'B', 'B', 'B', 'B', 'B', 'C', 'W', 'W', 'W', 'B', 'B', 'B', 'D', 'B', 'F', 'C', 'C', 'B', 'B', 'B', 'B', 'B', 'B', 'A', 'B', 'A', 'B', 'A', 'B', 'B', 'C', 'C', 'B', 'B', 'D']