In [1]:

    
%reload_ext autoreload
%autoreload 2
%matplotlib inline

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

import sys
sys.path.append('..')

from helper import linear_regression as lr  # my own module
from helper import general as general



In [2]:

    
data = pd.read_csv('ex1data1.txt', names=['population', 'profit'])

data.head()

compute cost



In [3]:

    
X = general.get_X(data)
print(X.shape, type(X))

y = general.get_y(data)
print(y.shape, type(y))









    



(97, 2) <class 'numpy.ndarray'>
(97,) <class 'numpy.ndarray'>



In [4]:

    
theta = np.zeros(X.shape[1])



In [5]:

    
lr.cost(theta, X, y)









    Out[5]:





32.072733877455676

batch gradient decent



In [6]:

    
epoch = 500
final_theta, cost_data = lr.batch_gradient_decent(theta, X, y, epoch)



In [7]:

    
# compute final cost
lr.cost(final_theta, X, y)









    Out[7]:





4.7138095311168664

visualize cost data



In [8]:

    
ax = sns.tsplot(cost_data, time=np.arange(epoch+1))
ax.set_xlabel('epoch')
ax.set_ylabel('cost')









    Out[8]:





<matplotlib.text.Text at 0x1182d8f98>



In [9]:

    
b = final_theta[0] # intercept
m = final_theta[1] # slope

plt.scatter(data.population, data.profit, label="Training data")
plt.plot(data.population, data.population*m + b, label="Prediction")
plt.legend(loc=2)









    Out[9]:





<matplotlib.legend.Legend at 0x1186c6780>



In [ ]:

	population	profit
0	6.1101	17.5920
1	5.5277	9.1302
2	8.5186	13.6620
3	7.0032	11.8540
4	5.8598	6.8233