In [ ]:
# ライブラリのインポート

import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn import linear_model
import math
from sklearn.linear_model import LinearRegression

print("libraries imported")

In [ ]:
## 扱うデータ (x, y)
np.random.seed(0)

n_samples = 30
degrees = [1, 4, 15]

true_fun = lambda X: np.cos(1.5 * np.pi * X)
x = np.sort(np.random.rand(n_samples))
y = true_fun(x) + np.random.randn(n_samples) * 0.1

plt.figure(figsize=(14, 5))


print(x.shape)
print(y.shape)

x_plot = np.linspace(0, 1, 100)

# create matrix versions of these arrays
X = x[:, np.newaxis]
X_plot = x_plot[:, np.newaxis]

colors = ['teal', 'yellowgreen', 'gold']
lw = 2
plt.scatter(x, y, color='navy', s=30, marker='o', label="training points")

plt.show()

In [ ]:
# 自分でフィットする関数を作ってみる
def predict(X):
    return [predict_point(x) for x in X]

# math.sin(x)を使わないでフィットする式を立ててみる
def predict_point(x):
    return 1 ## TASK

plt.clf()
plt.scatter(x, y, color='navy', s=30, marker='o', label="training points")

y_plot = predict(X_plot)
plt.plot(x_plot, y_plot, color='blue', linewidth=lw, label="Handmade Prediction")

plt.show()

In [ ]:
## 1次元の線形回帰

reg = LinearRegression()
reg.fit(X, y)


## 予測
print(reg.predict([0]))

## Plot
plt.clf()
plt.scatter(x, y, color='navy', s=30, marker='o', label="training points")

y_plot = reg.predict(X_plot)
plt.plot(x_plot, y_plot, color='blue', linewidth=lw, label="Handmade Prediction")
plt.plot(x_plot, true_fun(x_plot), label="True function")

plt.show()

In [ ]:
## 2次元の線形回帰

## 高次元の特徴量を追加する。x*xを追加
reg = make_pipeline(PolynomialFeatures(2), LinearRegression())
reg.fit(X, y)

## 予測
print(reg.predict([0]))

## Plot
plt.clf()
plt.scatter(x, y, color='navy', s=30, marker='o', label="training points")

y_plot = reg.predict(X_plot)
plt.plot(x_plot, y_plot, color='blue', linewidth=lw, label="Handmade Prediction")
plt.plot(x_plot, true_fun(x_plot), label="True function")

plt.show()

In [ ]:
## 1次元の線形回帰
# create matrix versions of these arrays
X = x[:, np.newaxis]

for degree in range(2, 15):
    ## 高次元の特徴量を追加する。
    reg = make_pipeline(PolynomialFeatures(degree), LinearRegression())
    reg.fit(X, y)
    
    print("score Y=f(x, degree={0})=".format(degree), reg.score(X, y))
    
    ## Plot
    plt.clf()
    plt.scatter(x, y, color='navy', s=30, marker='o', label="training points")

    y_plot = reg.predict(X_plot)
    plt.plot(x_plot, y_plot, color='blue', linewidth=lw, label="Handmade Prediction")
    plt.plot(x_plot, true_fun(x_plot), label="True function")
    plt.xlim((0, 1))
    plt.ylim((-2, 2))
    
    plt.show()