notebook.community



In [5]:

    
#import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
%matplotlib inline



In [6]:

    
path = os.getcwd() + '/' + 'data.txt'
data = pd.read_csv(path, header=None, names=['X','Y'])
#data.head
data









    Out[6]:






  
    
      
      X
      Y
    
  
  
    
      0
       0
        1
    
    
      1
       1
        3
    
    
      2
       2
        7
    
    
      3
       3
       13
    
    
      4
       4
       21
    
  

5 rows × 2 columns



In [7]:

    
data.plot(kind='scatter', x='X', y='Y', figsize=(6,4))









    Out[7]:





<matplotlib.axes.AxesSubplot at 0x7f13d9787f28>






    



/usr/lib/python3/dist-packages/matplotlib/collections.py:549: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == 'face':

Implementing 1 dimentional linear regression



In [8]:

    
def computeCost(X, y, theta):  
    inner = np.power(((X * theta.T) - y), 2)
    return np.sum(inner) / (2 * len(X))



In [9]:

    
# append a ones column to the front of the data set
data.insert(0, 'Ones', 1)

# set X (training data) and y (target variable)
cols = data.shape[1]  
X = data.iloc[:,0:cols-1]  
y = data.iloc[:,cols-1:cols]



In [10]:

    
# convert from data frames to numpy matrices
X = np.matrix(X.values)  
y = np.matrix(y.values)  
theta = np.matrix(np.array([0,0]))



In [11]:

    
X.shape, theta.shape, y.shape









    Out[11]:





((5, 2), (1, 2), (5, 1))



In [12]:

    
computeCost(X, y, theta)









    Out[12]:





66.900000000000006



In [13]:

    
def gradientDescent(X, y, theta, alpha, iters):  
    temp = np.matrix(np.zeros(theta.shape))
    parameters = int(theta.ravel().shape[1])
    cost = np.zeros(iters)
    Wtheta = np.zeros(shape=(iters,2))
    
    for i in range(iters):
        error = (X * theta.T) - y
        for j in range(parameters):
            term = np.multiply(error, X[:,j])
            temp[0,j] = theta[0,j] - ((alpha / len(X)) * np.sum(term))

        theta = temp
        cost[i] = computeCost(X, y, theta)
        Wtheta [i,0] = theta[0,0]
        Wtheta [i,1] = theta[0,1]
        #print (theta)     
    return Wtheta, cost



In [14]:

    
# initialize variables for learning rate and iterations
alpha = 0.01
iters = 1000

# perform gradient descent to "fit" the model parameters
W, cost = gradientDescent(X, y, theta, alpha, iters)



In [15]:

    
x = np.linspace(data.X.min(), data.X.max(), 10)



In [16]:

    
fig, ax = plt.subplots(figsize=(10,10)) 
for i in range(iters):
    f = W[i, 0] + (W[i, 1] * x)
    ax.plot(x, f, 'b') 
f = W[iters-1, 0] + (W[iters-1, 1] * x)
ax.plot(x, f,'r') 
ax.scatter(data.X, data.Y, label='Traning Data')  
ax.legend(loc=2)  
ax.set_xlabel('X')  
ax.set_ylabel('Y')  
ax.set_title('Predicted Y vs. X Size')









    Out[16]:





<matplotlib.text.Text at 0x7f13accd07f0>






    



/usr/lib/python3/dist-packages/matplotlib/collections.py:549: FutureWarning: elementwise comparison failed; returning scalar instead, but in the future will perform elementwise comparison
  if self._edgecolors == 'face':



In [17]:

    
fig, ax = plt.subplots(figsize=(12,8))  
ax.plot(np.arange(iters), cost, 'r')  
ax.set_xlabel('Iterations')  
ax.set_ylabel('Cost')  
ax.set_title('Error vs. Training Epoch')









    Out[17]:





<matplotlib.text.Text at 0x7f13abcb1c50>



In [ ]: