In [1]:
%matplotlib inline

In [7]:
%load ../ud120-projects/evaluation/evaluate_poi_identifier.py

In [29]:
#%%writefile ../ud120-projects/evaluation/evaluate_poi_identifier.py
#!/usr/bin/python


"""
    starter code for the evaluation mini-project
    start by copying your trained/tested POI identifier from
    that you built in the validation mini-project

    the second step toward building your POI identifier!

    start by loading/formatting the data

"""

import pickle
import sys
sys.path.append("../ud120-projects/tools/")
from feature_format import featureFormat, targetFeatureSplit

data_dict = pickle.load(open("../ud120-projects/final_project/final_project_dataset.pkl", "r") )

### add more features to features_list!
features_list = ["poi", "salary"]

data = featureFormat(data_dict, features_list)
labels, features = targetFeatureSplit(data)

### Splitting train/test

from sklearn.cross_validation import train_test_split

train_features, test_features, train_labels, test_labels = train_test_split(features, labels,
                                                                            test_size=0.3,
                                                                            random_state=42)

### Train decision tree
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix
from sklearn.metrics import classification_report

clf = DecisionTreeClassifier()
clf.fit(train_features, train_labels)

pred = clf.predict(test_features)

print "Confusion Matrix:\n", confusion_matrix(test_labels, pred), "\n"
print "Classification Report:\n", classification_report(test_labels, pred)
print "Accuracy:", accuracy_score(test_labels, pred)


Overwriting ../ud120-projects/evaluation/evaluate_poi_identifier.py

In [13]:
sum(test_labels)


Out[13]:
5.0

In [16]:
len(test_labels)


Out[16]:
29

In [17]:
24./29


Out[17]:
0.8275862068965517

In [18]:
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score

In [20]:
precision_score(test_labels, pred)


Out[20]:
0.0

In [21]:
recall_score(test_labels, pred)


Out[21]:
0.0

In [23]:
predictions = [0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1] 
true_labels = [0, 0, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 0, 1, 0, 0]

In [27]:
(6)/9.


Out[27]:
0.6666666666666666

In [28]:
6/8.


Out[28]:
0.75

In [ ]: