Develop model that can be used to make accurate predictions
Images: www.cs.utexas.edu/~mooney/cs391L/slides/svm.ppt
Image: www.cs.colostate.edu/~asa/pdfs/howto.pdf
In [4]:
from sklearn.datasets import make_moons
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
# Create a 2D data set
X, Y = make_moons(noise=0.15,random_state = 1,n_samples = 1000)
# Identify sample classes
class0_actual = np.where(Y==0);
class1_actual = np.where(Y==1);
# Plot data
plt.scatter(X[class0_actual,0],X[class0_actual,1],c = 'yellow')
plt.scatter(X[class1_actual,0],X[class1_actual,1],c = 'yellow')
Out[4]:
In [ ]:
# Your code goes here
In [ ]:
# Your code goes here
In [ ]:
# Your code goes here
In [ ]:
from sklearn.metrics import confusion_matrix
Y_pred = clf.predict(X_test);
cm = confusion_matrix(Y_test, Y_pred)
print("Confusion Matrix:")
print(cm)
In [ ]:
scores = cross_validation.cross_val_score(clf,X,Y,cv=10,scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
In [ ]:
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
Y_pred = clf.predict(X);
# Identify sample classes
class0 = np.where(Y_pred==0);
class1 = np.where(Y_pred==1);
# Plot data
plt.subplot(1, 2, 1)
plt.scatter(X[class0,0],X[class0,1],c = 'r')
plt.scatter(X[class1,0],X[class1,1],c = 'b')
plt.title('Predicted Classes')
plt.subplot(1,2,2)
plt.scatter(X[class0_actual,0],X[class0_actual,1],c = 'green')
plt.scatter(X[class1_actual,0],X[class1_actual,1],c = 'brown')
plt.title('Actual Classes')
In [ ]:
clf = svm.SVC(kernel='rbf',gamma=1,C=1).fit(X_train,Y_train)
In [ ]:
scores = cross_validation.cross_val_score(clf,X,Y,cv=10,scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
In [ ]:
from sklearn.metrics import confusion_matrix
Y_pred = clf.predict(X_test);
cm = confusion_matrix(Y_test, Y_pred)
print("Confusion Matrix:")
print(cm)
In [ ]:
import numpy as np
# Get predicted classes for all data
Y_pred = clf.predict(X);
# Identify sample classes
class0 = np.where(Y_pred==0);
class1 = np.where(Y_pred==1);
# Plot data
plt.subplot(1, 2, 1)
plt.scatter(X[class0,0],X[class0,1],c = 'r')
plt.scatter(X[class1,0],X[class1,1],c = 'b')
plt.title('Predicted Classes')
plt.subplot(1,2,2)
plt.scatter(X[class0_actual,0],X[class0_actual,1],c = 'green')
plt.scatter(X[class1_actual,0],X[class1_actual,1],c = 'brown')
plt.title('Actual Classes')
In [ ]:
from sklearn.grid_search import GridSearchCV
tuned_parameters = [{'kernel': ['rbf'], 'gamma': [1e-3, 1e-2, 1],'C': [1, 10, 100]},{'kernel': ['linear'], 'C': [1, 10, 100]}]
clf = GridSearchCV(svm.SVC(), tuned_parameters, cv=10,verbose = 2)
clf.fit(X, Y)
In [ ]:
clf_BestEstimator = clf.best_estimator_
print(clf_BestEstimator)
In [ ]:
print("Initial Guess:")
clf = svm.SVC(kernel='rbf',gamma = 1, C=1);
scores = cross_validation.cross_val_score(clf, X, Y, cv=10)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
print("")
print("Optimal Parameters (Grid Search):")
clf = svm.SVC(kernel='rbf',gamma = 1, C=100);
scores = cross_validation.cross_val_score(clf, X, Y, cv=10)
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
In [ ]:
from numpy import genfromtxt, savetxt
dataset = genfromtxt(open('/Users/annette/AMT_Work/BiologicalResponse/train.csv','r'), delimiter=',', dtype='f8')[1:]
Y = [x[0] for x in dataset]
X = [x[1:] for x in dataset]
# Make my own names for descriptor names
featureID = range(1776);
In [ ]:
from sklearn import cross_validation
X_train, X_test, Y_train, Y_test = cross_validation.train_test_split(X, Y, test_size=0.3, random_state = 0)
In [ ]:
from sklearn import svm
clf = svm.SVC(kernel='linear', C=1).fit(X_train, Y_train)
In [ ]:
scores = cross_validation.cross_val_score(clf,X,Y,cv=10,scoring='accuracy')
print("Accuracy: %0.2f (+/- %0.2f)" % (scores.mean(), scores.std() * 2))
In [ ]: