In [1]:
import numpy as np

import pandas as pd
# from pandas.tools.plotting import scatter_matrix

import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
%matplotlib inline

import time

%load_ext autoreload
%autoreload 2

1 Univariate Linear Regression


In [2]:
file_path_1 = '../course_materials/ex1data1.txt'

1.1 Data Visualisation


In [3]:
df = pd.read_csv(file_path_1, header = None)
n_samples, n_variables = df.shape
print(f"\tsample # {n_samples}\n\tvariables # {n_variables}")
df.head()


	sample # 97
	variables # 2
Out[3]:
0 1
0 6.1101 17.5920
1 5.5277 9.1302
2 8.5186 13.6620
3 7.0032 11.8540
4 5.8598 6.8233

In [4]:
Xs = df.iloc[:, :1]
Ys = df.iloc[:, 1:2]
plt.plot(Xs, Ys, 'o')
plt.xlabel("X")
plt.ylabel("Y")
plt.show()


1.2 Data Extraction and Transformation


In [5]:
def get_data(file_path):
    data = np.loadtxt(file_path, delimiter = ',')
    n_samples, n_variables = data.shape
#     matrix of variables X(n, k), where n is n_samples, and k is n_variables, including the intercept
    X = np.insert(data[:, :-1], 0, 1, axis=1)
#     vector of response y(n, 1), where n is n_samples
    y = data[:, -1:]
#     vector of coefficients β(k, 1), where k is n_variables, including the intercept
    β = np.zeros((n_variables,1))
    return β, X, y

In [7]:
β, X, y = get_data(file_path_1)

1.3 Linear Regression

1.3.1 Computing Parameters Analytically

Coefficient Matrix can be calculated as follows $$\beta = (X^T\cdot X)^{-1}\cdot X^T\cdot y$$

X is a matrix of variables with a first column of dummy variables as intercept (column of ones). y is a vector of responses. $\theta$ is a vector of coefficients in the linear regression model.


In [8]:
# Computing Parameters Analytically
def linear_regression_analytical(X, y):
#     β vector (2, 1), vector of coefficients
    β = np.dot(np.dot(np.linalg.inv(np.dot(X.T, X)), X.T), y)
    return β

1.3.2 Computing Parameters Using Gradient Descent & Linear Algebra

Vector Representation of the Regression Problem

Variables matrix X: $$ X = \begin{bmatrix} x_{1,1} & \ldots & x_{1,j} \\ \ldots & \ldots & \ldots \\ x_{n,1} & \ldots & x_{n,j}\\ \end{bmatrix} $$

Variables marix X, after adding a column of ones as intercepts: $$ X = \begin{bmatrix} 1 & x_{1,1} & \ldots & x_{1,k} \\ 1 & \ldots & \ldots & \ldots \\ 1 & x_{n,1}& \ldots & x_{n,k}\\ \end{bmatrix} $$

Response vector Y: $$ Y = \begin{bmatrix} y_1\\ \ldots\\ y_n\\ \end{bmatrix} $$

Coefficients vector $\beta$: $$ \beta = \begin{bmatrix} \beta_1\\ \ldots\\ \beta_k\\ \end{bmatrix} $$

Hypothesis $h_{\beta}(X) = X\cdot\beta$

Error $e = (h_{\beta}(X) - y)$

Cost Function $J = \frac{1}{2n}{\sum(h_{\beta} - y)^2}$

Gradient $\frac{\partial J}{\partial \beta} = \frac{1}{n}X^{T}\cdot e$<br>

In the code $\frac{\partial J}{\partial \beta}$ is denoted symply as g.


In [9]:
def get_hypothesis(β, X):
#     return hypothesis vector h(n, 1), where n is n_samples
    return np.dot(X, β)

def cost_function(β, X, y):
    n_samples, n_variables = X.shape
#     hypothesis vector h(n, 1)
    h = get_hypothesis(β, X)
#     cost scalar J(1, 1)
    J = np.sum((y-h)**2)/(2*n_samples)
#     similarly, cost J can be calculated using dot-product
#     J = np.dot((y-h).T, y-h)/(2*n_samples)
#     technically, the result is an array (1,1) rather than a float
    return J

def get_gradient_descent(β, X, y, α, iterations):
    n_samples, n_variables = X.shape
    J_history = []
    for i in range(iterations):
#         hypothesis vector h(n, 1)
        h = get_hypothesis(β, X)
#         error vector e(n, 1)
        e = h - y
#         cost scalar J
        J = cost_function(β, X, y)
#         gradient vector g(k, 1)
        g = np.dot(X.T, e)/(n_samples)
#         updated β vector β(k, 1)
        β = β - α*g
#         updated J_history
        J_history += [J] 
    return β, J_history

1.4 Results Visulisation & Analysis


In [10]:
def plot_regression(β, X, y):  
#     y_fit = β_fit[0][0] + β_fit[1][0]*X[:,1:]
    y_fit = np.dot(X,β_fit)
    
    MSE = np.sum((y - y_fit)**2)/y.shape[0]
    
    plt.plot(X[:,1:], y, 'o', X[:,1:], y_fit, '-')
    plt.xlabel("X")
    plt.ylabel("Y")
    print (
        f"\tβ_0 {β_fit[0][0]:.5f}\n",
        f"\tβ_1 {β_fit[1][0]:.5f}\n",
        f"\tY = {β_fit[0][0]:.2f} + {β_fit[1][0]:.2f} * X\n"
        f"\tMSE {MSE:.2}")
    return plt.show()

def plot_convergence(J_history, iterations):
    plt.plot(np.arange(1, iterations + 1), J_history, '-')
    plt.xlabel("iterations")
    plt.ylabel("J (cost)")
    plt.show()

1.4.1 Analytical Approach


In [11]:
β_fit = linear_regression_analytical(X, y)
plot_regression(β_fit, X, y)


	β_0 -3.89578
 	β_1 1.19303
 	Y = -3.90 + 1.19 * X
	MSE 9.0

1.4.2 Gradient-Descent Approach


In [13]:
# setting learning rate (α)
α = 0.01
iterations = 1000

β_fit = get_gradient_descent(β, X, y, α, iterations)[0]
plot_regression(β_fit, X, y)
costVector = get_gradient_descent(β, X, y, α, iterations)[1]
plot_convergence(costVector, iterations)


	β_0 -3.24140
 	β_1 1.12729
 	Y = -3.24 + 1.13 * X
	MSE 9.0

In [15]:
iterations = 1000
J = []
for α in [0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1]:
    β, J_history = get_gradient_descent(β, X, y, α, iterations)
    J += [J_history[-1]]
print (J)
plt.plot([0.000001, 0.00001, 0.0001, 0.001, 0.01, 0.1], J, 'o-')
plt.xlabel("α")
plt.ylabel("J")
plt.show()
# print (f"α = {α}",f"J = {J_history[-1]}")
# plotcost_functionVSiterations(J_history, iterations)


/Users/nikita/Documents/andrew-ng-2-python/.venv_andrew_ng_2_python/lib/python3.7/site-packages/numpy/core/fromnumeric.py:90: RuntimeWarning: overflow encountered in reduce
  return ufunc.reduce(obj, axis, dtype, out, **passkwargs)
/Users/nikita/Documents/andrew-ng-2-python/.venv_andrew_ng_2_python/lib/python3.7/site-packages/ipykernel_launcher.py:10: RuntimeWarning: overflow encountered in square
  # Remove the CWD from sys.path while we load stuff.
/Users/nikita/Documents/andrew-ng-2-python/.venv_andrew_ng_2_python/lib/python3.7/site-packages/ipykernel_launcher.py:29: RuntimeWarning: invalid value encountered in subtract
[28.10934410357117, 10.199564153888034, 5.859428582746157, 5.4412597614266875, 4.503182748795932, nan]

1.5.1 Data Extraction and Transformation Using Pandas


In [17]:
def data2df(file_path):
    df = pd.read_csv(
        file_path,
        header=None,
        names=['x1', 'y'])
    df.insert(0, 'x0', 1)
    nRows, nColumns = df.shape
    X = df[['x0','x1']]
    y = df[['y']]
    β = pd.DataFrame(np.zeros(X.shape[1]), columns = ['β'])
    return β, X, y

β_pd, X_pd, y_pd = data2df(file_path_1)

X_pd.head()
y_pd.head()
β_pd.head()
print(f"samples # {X_pd.shape[0]}")


samples # 97

In [18]:
plt.plot(X_pd['x1'], y_pd, 'o', ms = 5)
plt.xlabel('X')
plt.ylabel('Y')
plt.show()



In [19]:
def plot_regression_pd(β, X, y):
#     y_fit = β_fit[0][0] + β_fit[1][0]*X[:,1:]
    y_fit = np.dot(X,β_fit)
    MSE = np.sum((y - y_fit)**2)/y_pd.shape[0]

    plt.plot(X['x1'], y, 'o', X['x1'], y_fit, '-')
    plt.xlabel("X")
    plt.ylabel("Y")
    print (f"\tβ_0 {β_fit[0][0]:.5f}\n",
           f"\tβ_1 {β_fit[1][0]:.5f}\n",
           f"\tY = {β_fit[0][0]:.2f} + {β_fit[1][0]:.2f} * X\n"
           f"\tMSE {MSE[0]:.2f}"
          )
    return plt.show()

1.5.2 Analytical Approach Using Pandas


In [20]:
β_fit_pd = linear_regression_analytical(X_pd, y_pd)
plot_regression_pd(β_fit_pd, X_pd, y_pd)


	β_0 -3.24140
 	β_1 1.12729
 	Y = -3.24 + 1.13 * X
	MSE 9.03

1.5.3 Gradient-Descent Approach Using Pandas


In [22]:
α = 0.01
iterations = 1000

β_fit_pd = get_gradient_descent(β_pd, X_pd, y_pd, α, iterations)
plot_regression_pd(β_fit_pd[0], X_pd, y_pd)
# costVector = gradient_descent(β, X, y, α, iterations)[1]
# plot_convergence(costVector, iterations)


	β_0 -3.24140
 	β_1 1.12729
 	Y = -3.24 + 1.13 * X
	MSE 9.03

2 Multivariate Linear Regression


In [23]:
file_path = '../course_materials/ex1data2.txt'

2.1 Data Extraction and Transformation


In [25]:
df = pd.read_csv(file_path, header = None, names = ['x1', 'x2', 'y'])
df.insert(0, 'x0', 1)
nRows, nColumns = df.shape
X = df[['x0','x1', 'x2']]
y = df[['y']]
β_pd = pd.DataFrame(np.zeros(X.shape[1]), columns = ['β'])

df.head()
X.head()
y.head()
β_pd.head()
print(f"samples # {df.shape[0]}")


samples # 47

In [ ]: