In [1]:
#NAIVE BAYES
import sys
from time import time
sys.path.append("tools/")
from email_preprocess import preprocess
### features_train and features_test are the features for the training
### and testing datasets, respectively
### labels_train and labels_test are the corresponding item labels
features_train, features_test, labels_train, labels_test = preprocess()
#########################################################
### your code goes here ###
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import accuracy_score
clf = GaussianNB()
t0 = time()
clf.fit(features_train, labels_train)
print "training time: ", round(time()-t0, 3), "s"
t1 = time()
pred = clf.predict(features_test)
print "predicting time: ", round(time()-t1, 3), "s"
accuracy = accuracy_score(labels_test, pred)
print accuracy
In [2]:
#SVM
import sys
from time import time
sys.path.append("../tools/")
from email_preprocess import preprocess
### features_train and features_test are the features for the training
### and testing datasets, respectively
### labels_train and labels_test are the corresponding item labels
features_train, features_test, labels_train, labels_test = preprocess()
#########################################################
### your code goes here ###
from sklearn import svm
from sklearn.metrics import accuracy_score
linear_kernel_svm = svm.SVC(kernel='rbf', C=10000.)
features_train = features_train[:len(features_train)/100]
labels_train = labels_train[:len(labels_train)/100]
t0 = time()
linear_kernel_svm.fit(features_train, labels_train)
print "training time with SVM's linear kernel", time() - t0
t1 = time()
pred = linear_kernel_svm.predict(features_test)
print "prediction time with SVM's linear kernel", time() - t1
acc = accuracy_score(labels_test, pred)
print acc
#########################################################
def time_with_power(power, people,times):
results = nd.random.power(power, people)
for i in range(times):
results += nd.random.power(power, 1000)
return results
In [ ]:
#DECISION TREE
import sys
from time import time
sys.path.append("../tools/")
from email_preprocess import preprocess
### features_train and features_test are the features for the training
### and testing datasets, respectively
### labels_train and labels_test are the corresponding item labels
features_train, features_test, labels_train, labels_test = preprocess()
print "Size of features matrix: ", features_train.shape
#########################################################
### your code goes here ###
from sklearn import tree
from sklearn.metrics import accuracy_score
clf = tree.DecisionTreeClassifier(min_samples_split=40)
clf.fit(features_train, labels_train)
pred = clf.predict(features_test)
acc = accuracy_score(labels_test, pred)
print "Accuracy: ", acc
#########################################################