In [1]:
import numpy as np
from scipy import optimize
import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
matplotlib.style.use('ggplot')
%matplotlib inline
from sklearn.preprocessing import PolynomialFeatures
import csv
import time
%load_ext autoreload
%autoreload 2
In [2]:
file_path_1 = '../course_materials/ex2data1.txt'
In [3]:
dF_1 = pd.read_csv(file_path_1, header = None, names = ['x1', 'x2', 'y'])
n_samples_1, n_variables_1 = dF_1.shape
print (f"\tn_samples # {n_samples_1}\n\tvariables # {n_variables_1}")
dF_1.head()
Out[3]:
In [4]:
x1 = dF_1['x1']
x2 = dF_1['x2']
plt.figure(figsize=(8,6))
plt.scatter(x1, x2, c = dF_1['y'])
plt.xlabel("X1")
plt.ylabel("X2")
plt.show()
In [5]:
def get_data(file_path):
# try wiht matrices as well
data = np.loadtxt(file_path, delimiter = ',')
n_samples, n_variables = data.shape
X = np.insert(data[:, :-1], 0, 1, axis=1)
y = data[:, -1:]
# β = np.matrix(np.zeros(n_variables)).T
β = np.zeros(n_variables)
return β, X.flatten(), y.flatten(), n_samples, n_variables
In [6]:
def get_sigmoid(z):
return 1/(1 + np.exp(-z))
def get_hypothesis(β, X, n_samples, n_variables):
β = β.reshape(n_variables, -1)
X = X.reshape(n_samples, -1)
return get_sigmoid(np.dot(X, β))
# def cost_function(X, y, β):
def cost_function(β, X, y, n_samples, n_variables, λ=0.):
# β = β.reshape(n_variables, -1)
# X = X.reshape(n_samples, -1)
y = y.reshape(n_samples, -1)
# hypothesis vector h(n, 1)
h = get_hypothesis(β, X, n_samples, n_variables)
# cost scalar J(1, 1)
J = (- np.dot(y.T, np.log(h)) - np.dot((1-y).T, np.log(1-h)))/n_samples
# similarly cost J can be calculated using np.multiply together with np.sum
# cost = -np.sum(np.multiply(y, np.log(h)) + np.multiply((1-y), np.log(1-h)))/n_samples
# regularisation scalar (R)
R = λ*np.dot(β[1:].T,β[1:])/(2*n_samples)
return (J + R)[0][0]
def optimise_β_1 (β, X, y, n_samples, n_variables, λ=0.):
return optimize.fmin(cost_function, x0=β, args=(X, y, n_samples, n_variables, λ), maxiter=1500, full_output=True)
def get_prediction(β, X, n_samples, n_variables):
return get_hypothesis(β, X, n_samples, n_variables) >= 0.5
In [7]:
β_test_1, X_1, y_1, n_samples_1, n_variables_1 = get_data(file_path_1)
y_1.shape
Out[7]:
In [8]:
J = cost_function(β_test_1, X_1, y_1, n_samples_1, n_variables_1)
print(f"\tJ = {J:.5f}")
In [9]:
β_opt_1 = optimise_β_1(β_test_1, X_1, y_1, n_samples_1, n_variables_1)[0]
xTest_1 = np.array([1, 45, 85])
n_samplesTest_1 = 1
hypothesis = get_hypothesis(β_opt_1, xTest_1, n_samplesTest_1, n_variables_1)[0][0]
print(f"\tP = {hypothesis:.5f}")
This comment is here thanks to this dude (https://github.com/vsevolodloik).
Decision boundary is defined as follows:
$\frac{1}{1 + e^{X\cdot\beta}} = \frac{1}{2}$
Therefore, for the simple case of two variables, the equation of decision boundary takes the following form:
$\beta_0+\beta_1\cdot{X_1}+\beta_2 \cdot{X_2} = 0$
The rate type I error (false positives) is denoted by $\alpha$.
The rate type II error (false negatives) is denoted by $\beta$.
In [10]:
def get_accurary_precision_recall(
X, y, p):
# Elegant way to calculate TP, FP, and FN
tp = np.sum(y * p)
fp = np.sum(y - p == -1)
fn = np.sum(y - p == 1)
accuracy = (X.shape[0] - fp - fn)/X.shape[0]
precision = tp / (tp + fp)
recall = tp / (tp + fn)
print(f"\taccuracy {accuracy:.3f}\n\tprecision {precision:.3f}\n\trecall {recall:.3f}")
return {
'accuracy': accuracy,
'precision': precision,
'recall': recall}
def goodness_of_fit(β, X, y, n_samples, n_variables):
β_R = β.reshape(n_variables, -1)
X_R = X.reshape(n_samples, -1)
y_R = y.reshape(n_samples, -1)
p = get_prediction(β, X, n_samples, n_variables).flatten()
get_accurary_precision_recall(X, y, p)
plt.figure(figsize=(8,6))
x1 = X_R[:, 1:2]
x2 = X_R[:, 2:]
plt.scatter(x1, x2, c = y_R[:, 0:])
x2_fit = - β_R[0]/β_R[2] - x1*β_R[1]/β_R[2]
plt.plot(x1, x2_fit, '-')
plt.xlabel("X1")
plt.ylabel("X2")
return plt.show()
In [11]:
goodness_of_fit(β_opt_1, X_1, y_1, n_samples_1, n_variables_1)
http://www.johnwittenauer.net/tag/machine-learning/
http://aimotion.blogspot.se/2011/11/machine-learning-with-python-logistic.html
https://beckernick.github.io/logistic-regression-from-scratch/
https://github.com/kaleko/CourseraML/blob/master/ex2/ex2.ipynb
http://www.scipy-lectures.org/advanced/mathematical_optimization/
In [12]:
file_path_2 = '../course_materials/ex2data2.txt'
dF_2 = pd.read_csv(file_path_2, header = None)
n_samples, n_variables = dF_2.shape
print (f"\tsamples # {n_samples}\n\tvariables # {n_variables}")
dF_2.head()
Out[12]:
In [13]:
X_1s = dF_2.iloc[:, :1]
X_2s = dF_2.iloc[:, 1:2]
plt.figure(figsize=(8,6))
plt.scatter(X_1s, X_2s, c = dF_2.iloc[:, 2:])
plt.xlabel('X1')
plt.ylabel('X2')
plt.show()
Add polynomial and interaction features using SciKitLearn Preprocessing<br> http://scikit-learn.org/stable/modules/getd/sklearn.preprocessing.PolynomialFeatures.html
In [14]:
def add_polynomial(file_path, polynomialDegree):
data = np.loadtxt(file_path, delimiter = ',')
n_samples, n_variables = data.shape
X = data[:, :-1]
y = data[:, -1:]
poly = PolynomialFeatures(polynomialDegree)
# X without intercept is passed to PolynomialFeatures.fit_transform.
# Intercept is added automatically.
poly_X = poly.fit_transform(X)
n_samples, n_variables = poly_X.shape
β = np.zeros((n_variables,1))
return β.flatten(), poly_X.flatten(), y.flatten(), n_samples, n_variables
In [15]:
βPoly6, XPoly6, yPoly6, n_samplesPoly6, n_variablesPoly6 = add_polynomial(file_path_2, 6)
In [16]:
J = cost_function(βPoly6, XPoly6, yPoly6, n_samplesPoly6, n_variablesPoly6)
print(f"J = {J:.5f}")
In [17]:
def optimise_β_2(β, X, y, n_samples, n_variables, λ=0.):
# Number of iterations significantly affects the output of this function
β_optimised = optimize.minimize(cost_function, β, args=(X, y, n_samples, n_variables, λ),
method='BFGS', options={'maxiter':200, "disp":False})
# β_optimised = optimize.fmin_cg(cost_function, fprime=back_propagation, x0=β_flat,
# args=(layer, X_flat, n_samples, y, yUnique),
# maxiter=50,disp=True,full_output=True)
return(β_optimised['x'])
In [18]:
# β_opt = optimise_β(βPoly6, XPoly6, yPoly6, λ = 0.)[0]
In [19]:
def decision_boundary(β, X, y, n_samples, n_variables, xMin, xMax, step, polyOrder, λ=0.):
p = get_prediction(β, X, n_samples, n_variables).flatten()
get_accurary_precision_recall(X, y, p)
xx = np.linspace(xMin[0], xMax[0], step)
yy = np.linspace(xMin[1], xMax[1], step)
XX, YY = np.meshgrid(xx, yy)
XY = np.concatenate((XX.reshape(step**2, -1), YY.reshape(step**2, -1)), axis=1)
# X without intercept is passed to PolynomialFeatures.fit_transform.
# Intercept is added automatically.
poly = PolynomialFeatures(polyOrder)
poly_XY = poly.fit_transform(XY)
ZZ = get_hypothesis(β, poly_XY, step**2, poly_XY.shape[1]).reshape(step, -1)
# ZZ = prediction(β, poly_XY, step**2, poly_XY.shape[1]).reshape(step, -1)
# For plt.contour, ZZ is a matrix
# xx(XX) and yy(YY) can be either matrices or vectors of appropriate dimensions
decision_boundary = plt.contour(xx, yy, ZZ, [0.5])
plt.clabel(decision_boundary, inline=1, fontsize=15, fmt = '$\lambda $= %d'%λ)
x_1 = X.reshape(n_samples,-1)[:, 1:2]
x_2 = X.reshape(n_samples,-1)[:, 2:3]
plt.scatter(x_1, x_2, c = y.reshape(n_samples,-1)[:, 0:])
plt.title("Decision Boundary")
plt.xlabel("X1")
plt.ylabel("X2")
return plt.show()
In [20]:
λ = 0
polyOrder = 6
βPoly, XPoly, yPoly, sS_Poly, nV_Poly = add_polynomial(file_path_2, polyOrder)
β_opt_2 = optimise_β_2(βPoly, XPoly, yPoly, sS_Poly, nV_Poly, λ)
xMin = (-1., -1.)
xMax = (1.2, 1.2)
step = 50
decision_boundary(β_opt_2, XPoly, yPoly, sS_Poly, nV_Poly, xMin, xMax, step, polyOrder, λ)
In [21]:
for i, λ in enumerate([0., 1., 10, 100 ]):
polyOrder = 6
βPoly, XPoly, yPoly, n_samplesPoly, n_variablesPoly = add_polynomial(file_path_2, polyOrder)
β_opt = optimise_β_2(βPoly, XPoly, yPoly, n_samplesPoly, n_variablesPoly, λ)
xMin = (-1., -1.)
xMax = (1.2, 1.2)
step = 50
decision_boundary(β_opt, XPoly, yPoly, n_samplesPoly, n_variablesPoly, xMin, xMax, step, polyOrder, λ)
In [ ]: