In [ ]:
# ライブラリのインポート
import numpy as np
import matplotlib.pyplot as plt
from sklearn.preprocessing import PolynomialFeatures
from sklearn.pipeline import make_pipeline
from sklearn import linear_model
import math
from sklearn.linear_model import LinearRegression
print("libraries imported")
In [ ]:
## 扱うデータ (x, y)
np.random.seed(0)
n_samples = 30
degrees = [1, 4, 15]
true_fun = lambda X: np.cos(1.5 * np.pi * X)
x = np.sort(np.random.rand(n_samples))
y = true_fun(x) + np.random.randn(n_samples) * 0.1
plt.figure(figsize=(14, 5))
print(x.shape)
print(y.shape)
x_plot = np.linspace(0, 1, 100)
# create matrix versions of these arrays
X = x[:, np.newaxis]
X_plot = x_plot[:, np.newaxis]
colors = ['teal', 'yellowgreen', 'gold']
lw = 2
plt.scatter(x, y, color='navy', s=30, marker='o', label="training points")
plt.show()
In [ ]:
# 自分でフィットする関数を作ってみる
def predict(X):
return [predict_point(x) for x in X]
# math.sin(x)を使わないでフィットする式を立ててみる
def predict_point(x):
return 1 ## TASK
plt.clf()
plt.scatter(x, y, color='navy', s=30, marker='o', label="training points")
y_plot = predict(X_plot)
plt.plot(x_plot, y_plot, color='blue', linewidth=lw, label="Handmade Prediction")
plt.show()
In [ ]:
## 1次元の線形回帰
reg = LinearRegression()
reg.fit(X, y)
## 予測
print(reg.predict([0]))
## Plot
plt.clf()
plt.scatter(x, y, color='navy', s=30, marker='o', label="training points")
y_plot = reg.predict(X_plot)
plt.plot(x_plot, y_plot, color='blue', linewidth=lw, label="Handmade Prediction")
plt.plot(x_plot, true_fun(x_plot), label="True function")
plt.show()
In [ ]:
## 2次元の線形回帰
## 高次元の特徴量を追加する。x*xを追加
reg = make_pipeline(PolynomialFeatures(2), LinearRegression())
reg.fit(X, y)
## 予測
print(reg.predict([0]))
## Plot
plt.clf()
plt.scatter(x, y, color='navy', s=30, marker='o', label="training points")
y_plot = reg.predict(X_plot)
plt.plot(x_plot, y_plot, color='blue', linewidth=lw, label="Handmade Prediction")
plt.plot(x_plot, true_fun(x_plot), label="True function")
plt.show()
In [ ]:
## 1次元の線形回帰
# create matrix versions of these arrays
X = x[:, np.newaxis]
for degree in range(2, 15):
## 高次元の特徴量を追加する。
reg = make_pipeline(PolynomialFeatures(degree), LinearRegression())
reg.fit(X, y)
print("score Y=f(x, degree={0})=".format(degree), reg.score(X, y))
## Plot
plt.clf()
plt.scatter(x, y, color='navy', s=30, marker='o', label="training points")
y_plot = reg.predict(X_plot)
plt.plot(x_plot, y_plot, color='blue', linewidth=lw, label="Handmade Prediction")
plt.plot(x_plot, true_fun(x_plot), label="True function")
plt.xlim((0, 1))
plt.ylim((-2, 2))
plt.show()