Overfitting


In [1]:
import numpy as np
from bokeh.io import push_notebook
from bokeh.models import ColumnDataSource
from bokeh.plotting import figure, show, output_notebook
from ipywidgets import interact

from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.pipeline import make_pipeline

In [2]:
output_notebook()


Loading BokehJS ...

In [3]:
def make_data(N=90, error=1.0, random_seed=None):
    def test_func(x, err=0.5):
        y = 10 - 1. / (x + 0.1)
        if err > 0:
            y = np.random.normal(y, err)
        return y

    # randomly sample the data
    np.random.seed(1)
    X = np.random.random(N)[:, np.newaxis]
    y = test_func(X.ravel(), error)
    
    return X, y

X, y = make_data(random_seed=1)

In [4]:
def PolynomialRegression(degree=2, **kwargs):
    return make_pipeline(PolynomialFeatures(degree),
                         LinearRegression(**kwargs))

In [5]:
regr = PolynomialRegression(degree=2)
regr.fit(X, y)


Out[5]:
Pipeline(memory=None,
     steps=[('polynomialfeatures', PolynomialFeatures(degree=2, include_bias=True, interaction_only=False)), ('linearregression', LinearRegression(copy_X=True, fit_intercept=True, n_jobs=1, normalize=False))])

In [6]:
X_plot = np.linspace(X[:,0].min(), X[:,0].max(), 100)[:, np.newaxis]
y_plot = regr.predict(X_plot)

source = ColumnDataSource(data=dict(x=X_plot, y=y_plot))

In [7]:
TOOLS="hover,crosshair,pan,wheel_zoom,zoom_in,zoom_out,box_zoom,undo,redo,reset,tap,save,box_select,poly_select,lasso_select,"

p = figure(tools=TOOLS)

p.scatter(X.ravel(), y)
p.line('x', 'y', color="#2222aa", alpha=0.5, line_width=2, source=source)


Out[7]:
GlyphRenderer(
id = '2f9ddc4a-384c-4366-89a1-e5eb6a8f726e', …)

In [8]:
def update(poly_degree):
    regr = PolynomialRegression(degree=poly_degree)
    regr.fit(X, y)
    y_plot = regr.predict(X_plot)
    source.data['y'] = y_plot
    push_notebook()

In [11]:
show(p, notebook_handle=True)


Out[11]:

<Bokeh Notebook handle for In[11]>

Here you can change a degree of polynomial used by the model


In [10]:
interact(update, poly_degree=(0,20,1))


Out[10]:
<function __main__.update>

In [ ]: