In [2]:
import pandas as pd
import numpy as np
from matplotlib import pylab as plt
%matplotlib inline
df = pd.read_csv("data/cars.csv", delimiter=";")
df[-5:]
Out[2]:
In [3]:
def prepareX(X, order=1):
return np.hstack([np.power(X, i) for i in range(order + 1)])
In [4]:
def linreq_np(X, Y, order):
w, e, r, s = np.linalg.lstsq(X, Y)
return w
def linreq_sysequation(X, Y, order):
return (X.T * X).I * X.T * Y
def linreq_gradient(X, Y, order):
N = X.shape[0]
D = X.shape[1]
maxIteration = 50000
eta = 0.0002
theta = np.zeros((D, 1))
prev_error = np.infty
for i in range(maxIteration):
error = np.power(X * theta - y, 2).sum()
gradient = X.T * (X * theta - y) / N
theta = theta - eta * gradient
if prev_error * 0.99999 < error:
break
prev_error = error
return theta
In [84]:
def predict(X, model):
for i in X:
prediction = i * model
print("prediction of {} is {}".format(i, prediction))
def test(X, Y, trainer, **kwargs):
order = kwargs.get("order", 1)
X = prepareX(X, order)
model = trainer(X, Y, order)
# plot original data
plt.plot(X[:,1], Y, "rx")
# plot model
xTest = np.linspace(
int(X[:,1][0] - 10),
int(X[:,1][-1] + 20),
int(X[:,1][-1] - X[:,1][0]) * 2)
xTest = prepareX(np.matrix(xTest).T, order)
plt.title(kwargs.get("title", ""))
plt.plot(xTest[:,1], xTest * model, "b-")
plt.grid(True)
predict(prepareX(predictX, order), model)
return model
x = np.mat(df.Year).T - 1965
y = np.mat(df.Car).T / 1000000.
predictX=np.mat([2012, 2025, 2050]).T - 1965
In [83]:
test(x, y, linreq_np, title="via numpy.linalg.lstsqr", predictX=predictX)
Out[83]:
In [78]:
test(x, y, linreq_sysequation, title="solving sys of eq.", predictX=predictX)
Out[78]:
In [79]:
test(x, y, linreq_gradient, title="gradient", predictX=predictX)
Out[79]:
In [80]:
test(x, y, linreq_sysequation, order=2, title="linreq_sysequation order 2", predictX=predictX)
Out[80]:
In [81]:
test(x, y, linreq_sysequation, order=3, title="linreq_sysequation order 2", predictX=predictX)
Out[81]:
In [82]:
test(x, y, linreq_np, order=2, title="numpy.linalg.lstsqr order 5", predictX=predictX)
Out[82]:
In [171]:
def findSlopeAt(X, Y, slopeAt, order):
model = test(
X, Y, linreq_np, order=order,
title="numpy.linalg.lstsqr order 5", predictX=predictX)
der = np.polyder(model.T.A[0][::-1], order - 2)
xTest = np.linspace(
int(X[:,0][0] - 1),
int(X[:,0][-1] + 2),
int(X[:,0][-1] - X[:,0][0]) * 2)
xTest = prepareX(np.matrix(xTest).T, 3)
#plt.plot(xTest[:,1], xTest[:,1:] * np.mat(der).T, "g-")
findSlopeAt(x, y, 60, 3)
In [ ]: