In [1]:
import numpy as np
import pandas as pd
%pylab inline


Populating the interactive namespace from numpy and matplotlib

In [166]:
X = np.random.normal(scale = 10, size = (10000,1))
y = X*2+5 + np.random.normal(scale=100, size=(10000,1))

In [172]:
class LinearRegression:
    def __init__(self, lr = 0.01, max_iter = 1000000):
        self.k = np.random.random()
        self.b = np.random.random()
        self.lr = lr
        self.max_iter = max_iter
        
    def predict(self, x):
        return x*self.k+self.b
    
    def fit(self, data, answers):
        N = data.shape[0]
        step = 1
        it = 0
        while step > 1e-4 and it<self.max_iter:
            delta = (self.k*data+self.b-answers)
            mse = 1/N * delta.T.dot(delta)
            dk = 2/N * data.T.dot(delta)
            db = 2/N * np.sum(delta.T)
            self.k = self.k - self.lr * dk[0,0]
            self.b = self.b - self.lr * db
            step = np.sqrt(dk*dk+db*db)*self.lr
            it += 1

In [173]:
lr = LinearRegression(lr = 0.001)

In [174]:
lr.fit(X, y)

In [175]:
plot(X, X*2+5);
plot(X, X*lr.k+lr.b)
scatter(X, y)


Out[175]:
<matplotlib.collections.PathCollection at 0x15eaa41bcc0>

In [176]:
lr.k, lr.b


Out[176]:
(1.7631568836444196, 5.566980541275703)

In [ ]: