In [1]:
import pickle
import sys
sys.path.append("../tools/")
from feature_format import featureFormat, targetFeatureSplit
data_dict = pickle.load(open("../final_project/final_project_dataset.pkl", "r") )
In [2]:
### first element is our labels, any added elements are predictor
### features. Keep this the same for the mini-project, but you'll
### have a different feature list when you do the final project.
features_list = ["poi", "salary"]
data = featureFormat(data_dict, features_list)
labels, features = targetFeatureSplit(data)
In [3]:
from sklearn.tree import DecisionTreeClassifier
In [4]:
clf = DecisionTreeClassifier()
clf.fit(features,labels)
clf.score(features,labels)
Out[4]:
In [5]:
from sklearn.cross_validation import train_test_split
In [6]:
features_train, features_test, labels_train, labels_test = train_test_split(features,labels,test_size=0.3,random_state=42)
In [7]:
clf = DecisionTreeClassifier()
clf.fit(features_train,labels_train)
clf.score(features_test,labels_test)
Out[7]: