In [ ]:
%pylab inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns; sns.set();
scatter_args = dict(s=100, edgecolor='black', linewidth='1.5', cmap="autumn")
In [ ]:
def get_grid(data, step=0.1):
x_min, x_max = data.x.min() - 1, data.x.max() + 1
y_min, y_max = data.y.min() - 1, data.y.max() + 1
return np.meshgrid(np.arange(x_min, x_max, step),
np.arange(y_min, y_max, step))
from sklearn.cross_validation import cross_val_score
def get_score(X, y, cl):
return cross_val_score(cl, X, y, cv=5, scoring='mean_squared_error').mean()
def plot_linear_border(cl, X, plot, borders=1):
x_limits = (np.min(X.x) - borders, np.max(X.x) + borders)
y_limits = (np.min(X.y) - borders, np.max(X.y) + borders)
line_x = np.linspace(*x_limits, num=2)
line_y = (-line_x * cl.coef_[0, 0] - cl.intercept_) / cl.coef_[0, 1]
plot.plot(line_x, line_y, c='r', lw=2)
plot.fill_between(line_x, line_y, -100, color='r')
plot.fill_between(line_x, line_y, 100, color='yellow')
plot.autoscale(tight=True)
plot.set_xlim(*x_limits)
plot.set_ylim(*y_limits)
def show_classifier(X, y, cl,
feature_modifier=lambda x: x,
proba=True,
print_score=False,
borders=1):
fig, ax = plt.subplots(1, 1)
xys = c_[ravel(xs), ravel(ys)]
cl.fit(feature_modifier(X), y)
if print_score:
print("MSE = {}".format(get_score(feature_modifier(X), y, cl)))
if proba:
predicted = cl.predict_proba(feature_modifier(pd.DataFrame(xys, columns=('x', 'y'))))[:,1].reshape(xs.shape)
else:
predicted = cl.predict(feature_modifier(pd.DataFrame(xys, columns=('x', 'y')))).reshape(xs.shape)
plot_linear_border(cl, X, ax, borders=borders)
ax.scatter(X.x, X.y, c=y, **scatter_args)
return cl
In [ ]:
n = 200
random = np.random.RandomState(17)
df1 = pd.DataFrame(data=random.multivariate_normal((0,0), [[1, 0.3], [0.3, 0.7]], n), columns=['x', 'y'])
df1['target'] = 0
df2 = pd.DataFrame(data=random.multivariate_normal((1,2), [[1, -0.5], [-0.5, 1.6]], n), columns=['x', 'y'])
df2['target'] = 1
data = pd.concat([df1, df2], ignore_index=True)
features = data[['x', 'y']]
data.plot(kind='scatter', x='x', y='y', c='target', colormap='autumn', alpha=0.75, colorbar=False);
In [ ]:
from sklearn.svm import LinearSVC
big_grid = get_grid(features, 0.1)
show_classifier(features, data.target,
LinearSVC(),
proba=False);
In [ ]:
from sklearn.linear_model import SGDClassifier
In [ ]:
random = np.random.RandomState(11)
n_iters = 20
figure(figsize=(10, 8 * n_iters))
xys = c_[ravel(xs), ravel(ys)]
clf = SGDClassifier(alpha=1, l1_ratio=0)
train_objects = data.ix[random.choice(data.index, n_iters)]
for iteration in range(n_iters):
new_object = train_objects.iloc[iteration]
clf = clf.partial_fit([new_object[['x', 'y']]], [new_object.target], classes=[0, 1])
ax = subplot(n_iters, 1, iteration + 1)
title("objets count = {}".format(iteration + 1))
predicted = clf.predict(xys).reshape(xs.shape)
plot_linear_border(clf, features, ax)
processed_objects = train_objects.head(iteration + 1)
scatter(processed_objects.x, processed_objects.y, c=processed_objects.target, alpha=0.5, **scatter_args)
scatter(new_object.x, new_object.y, marker='x', lw='20')