In [2]:
import matplotlib.pyplot as plt
import numpy as np

%matplotlib inline

In [148]:
# create a sample data set for regression
X = (np.random.uniform(0, 1, size=100)).reshape(100,1)
y = 2 + 3*X + np.random.normal(0,0.25, size=100).reshape(100,1)

print X.shape
y.shape


(100, 1)
Out[148]:
(100, 1)

In [154]:
plt.plot(X, y, "k.")


Out[154]:
[<matplotlib.lines.Line2D at 0x7f04c901c310>]

In [150]:
from sklearn.neighbors import KNeighborsRegressor
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import SGDRegressor

knn3 = KNeighborsRegressor(n_neighbors=3).fit(X, y)
knn15 = KNeighborsRegressor(n_neighbors=15).fit(X, y)
lr = LinearRegression().fit(X, y)
sgd = SGDRegressor(loss='squared_loss', penalty='none', learning_rate='constant', eta0=0.1, n_iter=200).fit(X, y)

In [151]:
plt.figure(figsize=(12,9))
plt.plot(X, y, "k.")
knn_x = np.linspace(0, 1, num=80).reshape(80,1)
line_x = np.array([[0], [1]])
plt.plot(knn_x, knn3.predict(knn_x), label='3NN')
plt.plot(knn_x, knn15.predict(knn_x), label='15NN')
plt.plot(line_x, lr.predict(line_x), label='LR')
plt.plot(line_x, sgd.predict(line_x), "--", label='SGD')
plt.legend()


Out[151]:
<matplotlib.legend.Legend at 0x7f04c9251a50>

In [162]:
from sklearn.datasets import make_classification

X, y = make_classification(n_samples=100, n_features=2, n_informative=2, n_redundant=0, n_classes=2, 
                           n_clusters_per_class=2)

print X.shape
y.shape


(100, 2)
Out[162]:
(100,)

In [163]:
plt.scatter(X[:, 0], X[:, 1], c=y)


Out[163]:
<matplotlib.collections.PathCollection at 0x7f04ca763ad0>

In [167]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import AdaBoostClassifier

from sklearn.metrics import accuracy_score

knn = KNeighborsClassifier(n_neighbors=5).fit(X, y)
lrc = LogisticRegression().fit(X, y)
svm = SVC().fit(X, y)
net = MLPClassifier(hidden_layer_sizes=(15, 10), activation='logistic', learning_rate_init=0.1).fit(X, y)
rf = RandomForestClassifier(n_estimators=20).fit(X, y)
ab = AdaBoostClassifier(n_estimators=20).fit(X, y)

In [168]:
print "KNN Training Accuracy: {}".format(accuracy_score(y, knn.predict(X)))
print "Logistic Regression Training Accuracy: {}".format(accuracy_score(y, lrc.predict(X)))
print "SVM Training Accuracy: {}".format(accuracy_score(y, svm.predict(X)))
print "Neural Network Training Accuracy: {}".format(accuracy_score(y, net.predict(X)))
print "Random Forest Training Accuracy: {}".format(accuracy_score(y, rf.predict(X)))
print "AdaBoost Training Accuracy: {}".format(accuracy_score(y, ab.predict(X)))


KNN Training Accuracy: 0.9
Logistic Regression Training Accuracy: 0.89
SVM Training Accuracy: 0.9
Neural Network Training Accuracy: 0.91
Random Forest Training Accuracy: 1.0
AdaBoost Training Accuracy: 0.97

In [ ]: