notebook.community

Edit and run



In [2]:

    
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline



In [148]:

    
# create a sample data set for regression
X = (np.random.uniform(0, 1, size=100)).reshape(100,1)
y = 2 + 3*X + np.random.normal(0,0.25, size=100).reshape(100,1)

print X.shape
y.shape









    



(100, 1)






    Out[148]:





(100, 1)



In [154]:

    
plt.plot(X, y, "k.")









    Out[154]:





[<matplotlib.lines.Line2D at 0x7f04c901c310>]



In [150]:

    
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor

knn3 = KNeighborsRegressor(n_neighbors=3).fit(X, y)
knn15 = KNeighborsRegressor(n_neighbors=15).fit(X, y)
lr = LinearRegression().fit(X, y)
sgd = SGDRegressor(loss='squared_loss', penalty='none', learning_rate='constant', eta0=0.1, n_iter=200).fit(X, y)



In [151]:

    
plt.figure(figsize=(12,9))
plt.plot(X, y, "k.")
knn_x = np.linspace(0, 1, num=80).reshape(80,1)
line_x = np.array([[0], [1]])
plt.plot(knn_x, knn3.predict(knn_x), label='3NN')
plt.plot(knn_x, knn15.predict(knn_x), label='15NN')
plt.plot(line_x, lr.predict(line_x), label='LR')
plt.plot(line_x, sgd.predict(line_x), "--", label='SGD')
plt.legend()









    Out[151]:





<matplotlib.legend.Legend at 0x7f04c9251a50>



In [162]:

    
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0, n_classes=2, 
                           n_clusters_per_class=2)

print X.shape
y.shape









    



(100, 2)






    Out[162]:





(100,)



In [163]:

    
plt.scatter(X[:, 0], X[:, 1], c=y)









    Out[163]:





<matplotlib.collections.PathCollection at 0x7f04ca763ad0>



In [167]:

    
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.metrics import accuracy_score

knn = KNeighborsClassifier(n_neighbors=5).fit(X, y)
lrc = LogisticRegression().fit(X, y)
svm = SVC().fit(X, y)
net = MLPClassifier(hidden_layer_sizes=(15, 10), activation='logistic', learning_rate_init=0.1).fit(X, y)
rf = RandomForestClassifier(n_estimators=20).fit(X, y)
ab = AdaBoostClassifier(n_estimators=20).fit(X, y)



In [168]:

    
print "KNN Training Accuracy: {}".format(accuracy_score(y, knn.predict(X)))
print "Logistic Regression Training Accuracy: {}".format(accuracy_score(y, lrc.predict(X)))
print "SVM Training Accuracy: {}".format(accuracy_score(y, svm.predict(X)))
print "Neural Network Training Accuracy: {}".format(accuracy_score(y, net.predict(X)))
print "Random Forest Training Accuracy: {}".format(accuracy_score(y, rf.predict(X)))
print "AdaBoost Training Accuracy: {}".format(accuracy_score(y, ab.predict(X)))









    



KNN Training Accuracy: 0.9
Logistic Regression Training Accuracy: 0.89
SVM Training Accuracy: 0.9
Neural Network Training Accuracy: 0.91
Random Forest Training Accuracy: 1.0
AdaBoost Training Accuracy: 0.97



In [ ]: