In [1]:
import numpy as np
import scipy as sp
from scipy.io import loadmat
from scipy.stats import multivariate_normal
import matplotlib
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
from matplotlib.image import NonUniformImage
from matplotlib import cm
matplotlib.style.use('ggplot')
%matplotlib inline
%load_ext autoreload
%autoreload 2
In [2]:
anomalyfile_path = '../course_materials/ex8data1.mat'
anomalyData = loadmat(anomalyfile_path)
print(anomalyData.keys())
print(anomalyData['X'].shape)
print(anomalyData['Xval'].shape)
print(anomalyData['yval'].shape)
In [3]:
anomalyX = anomalyData['X']
plt.plot(anomalyX[:,:1], anomalyX[:,1:], 'o')
plt.axis('equal')
plt.show
Out[3]:
$p(x; \mu, \sigma ^2) = \frac{1}{\sqrt{2\pi}\cdot\sigma}exp\Big({-\frac{(x-\mu)^2}{2\sigma^2}}\Big)$, where
$\sigma$ is variance.
$p(x; \mu, \Sigma) = \frac{1}{{(2\pi)^{n/2}}\cdot{|\Sigma|^{1/2}}}exp\Big(-\frac{1}{2}(x-\mu)\Sigma^{-1}(x-\mu)^T\Big)$, where
$\Sigma$ is a covariance matrix (size $k\times k$);
$|\Sigma|$ is the determinant of a covariance matrix $\Sigma$
In [4]:
def gaussianParameter(X):
return np.mean(X, axis=0), np.cov(X.T)
def gaussianDistribution(X):
mu, Sigma = np.mean(X, axis=0), np.cov(X.T)
detSigma = np.linalg.det(Sigma)
In [5]:
mu, Sigma = gaussianParameter(anomalyX)
print(anomalyX.shape)
print(mu.shape)
print(Sigma.shape)
# def plotMultGaussian():
plt.plot(anomalyX[:,:1], anomalyX[:,1:], 'o', markersize=3, alpha=.3)
minX = 0,30
maxX = 0,30
XX, YY = np.mgrid[0:30:.5, 0:30:.5]
XY = np.empty(XX.shape + (2,))
XY[:, :, 0] = XX
XY[:, :, 1] = YY
multivariateGaussian = multivariate_normal(mu, Sigma)
plt.contour(XX, YY, multivariateGaussian.pdf(XY), [10 ** a for a in range(-22,0,4)])
plt.axis('equal')
plt.show
Out[5]:
In [6]:
σ = np.diag(np.var(anomalyX, axis=0))
plt.plot(anomalyX[:,:1], anomalyX[:,1:], 'o', markersize=3, alpha=.3)
minX = 0,30
maxX = 0,30
XX, YY = np.mgrid[0:30:.5, 0:30:.5]
XY = np.empty(XX.shape + (2,))
XY[:, :, 0] = XX
XY[:, :, 1] = YY
multivariateGaussian_1 = multivariate_normal(mu, σ)
plt.contour(XX, YY, multivariateGaussian_1.pdf(XY), [10**a for a in range(-22,0,4)])
plt.axis('equal')
plt.show
Out[6]:
The rate type I error (false positives) is denoted by $\alpha$.
The rate type II error (false negatives) is denoted by $\beta$.
Accuracy $= \frac {tP + tN}{tP + tN + fP + fN}$
Precision $= \frac {tP}{tP + fP}$
Recall $= \frac {tP}{tP + fN}$
F $= \frac {2\cdot Precision\cdot Recall}{Precision+Recall}$
In [7]:
Xval = anomalyData['Xval']
yval = np.array(anomalyData['yval'], dtype=bool).flatten()
plt.plot(Xval[:,:1], Xval[:,1:], 'o', markersize=3, alpha=.3)
plt.show
multivariateGaussian = multivariate_normal(mu, Sigma)
pX = multivariateGaussian.pdf(anomalyX)
pXmin = np.min(pX)
print(pXmin)
pXmax = np.max(pX)
# print(pXmax)
# pXval = multivariateGaussian.pdf(Xval)
# nSteps = 1
# F_score = 0
# for epsylon in np.linspace(pXmin, pXmax, nSteps):
# anomalyPrediction = pXval <= pXmax
# # print (yval)
# # print(yval.shape)
# # print (anomalyPrediction)
# # print(anomalyPrediction.shape)
# tP = np.sum(yval*anomalyPrediction)
# # print (tP)
# print (yval-anomalyPrediction)
# # fP = np.sum(yval-anomalyPrediction==-1)
# # print (fP)
# # fN = np.sum(yval-anomalyPrediction==1)
# # print (fN)
# # precision = tP/(tP+fP)
# # recall = tP/(tP+fN)
# # F_score = 2*precision*recall/(precision+recall)
# # print(F_score)
In [8]:
moviefile_path = '../course_materials/ex8_movies.mat'
movieData = loadmat(moviefile_path)
print(movieData.keys())
print(movieData['Y'].shape)
print(movieData['R'].shape)
movieRating is a movie-rating matrix (1682, 943) — movies number and users number.<br> movieRated is a matrix of rated movies (1682, 943); movieRated(n,k) = 1 if user k gave a rating to movie n.<br> NB Strictly speaking, movieRated can be produced from movieRating usign simple operation (movieRating != 0).
In [9]:
movieRating = movieData['Y']
movieRated = movieData['R']
n_movies, n_users = movieRating.shape
print(np.array_equal(movieRated, movieRating != 0))
In [10]:
(np.ones(movieRating.shape)*5)*(movieRating != 0)
Out[10]:
In [11]:
print ("'Toy Story' average rating", np.mean(movieRating[0,np.nonzero(movieRating[0,])]))
fig = plt.figure(figsize=(12, 8))
plt.imshow(movieRating)
plt.colorbar()
plt.ylabel("Movies", fontsize=15)
plt.xlabel("Users", fontsize=15)
Out[11]:
In [12]:
movieParametersfile_path = '../course_materials/ex8_movieParams.mat'
movieParameters = loadmat(movieParametersfile_path)
print(movieParameters.keys())
X = movieParameters['X']
β = movieParameters['Theta']
n_movies = int(movieParameters['num_movies'])
n_users = int(movieParameters['num_users'])
nFeatures = int(movieParameters['num_features'])
print(n_movies, n_users, nFeatures)
print(X.shape)
print(β.shape)
In [13]:
def flatten_βX(β, X):
return np.concatenate((β.flatten(), X.flatten()))
def reshape_βX(βX, n_movies, n_users):
splitβX = np.split(βX, [int(n_users*βX.size/(n_movies+n_users))])
return(splitβX[0].reshape(n_users,-1), splitβX[1].reshape(n_movies,-1))
def get_hypothesis(β, X, Y):
# for X(n,j), β(k,j), Y(n,k)
# hypothesis matrix h(n,k)= X(n,j)*β.T(j,k)
# to avoid bias, caused by including estimates for the movies, which have not been rated
return np.multiply(np.dot(X,β.T), Y!=0)
def cost_function(βX, Y, n_movies, n_users, λ=.0):
β, X = reshape_βX(βX, n_movies, n_users)
# hypothesis matrix h(n,k)= X(n,j)*β.T(j,k)
H = get_hypothesis(β, X, Y)
# cost scalar J
J = np.sum(np.square(H-Y))/2
# regularisation term
R = λ * (np.sum(np.square(β)) + np.sum(np.square(X)))/2
return J + R
In [14]:
testUsers = 4; testMovies = 5; testFeatures = 3
testX = X[:testMovies,:testFeatures]
testβ = β[:testUsers,:testFeatures]
testY = movieRating[:testMovies,:testUsers]
testβX = flatten_βX(testβ, testX)
print(cost_function(testβX, testY, testMovies, testUsers))
print(cost_function(testβX, testY, testMovies, testUsers, λ=1.5))
In [15]:
def get_gradient(βX, Y, n_movies, n_users, λ=.0):
β, X = reshape_βX(βX, n_movies, n_users)
# hypothesis matrix h(n,k) = X(n,j)*β.T(j,k)
H = get_hypothesis(β, X, Y)
gβ = np.dot((H-Y).T, X)
gX = np.dot(H-Y, β)
# regularisation term
gβ += λ*β
gX += λ*X
return flatten_βX(gβ, gX)
In [16]:
def testGradient(βX, Y, n_movies, n_users, λ=.0):
print('Numerical Gradient \t Gradient \t\t Difference')
Epsilon = 0.0001
iGradient = get_gradient(βX, Y, n_movies, n_users, λ=.0)
for i in np.random.choice(βX.size, 5, replace=False):
# creat epsilon perturbation vector
epsilon = np.zeros(βX.size)
epsilon[i] = Epsilon
negDelta = cost_function(βX-epsilon, Y, n_movies, n_users, λ=.0)
posDelta = cost_function(βX+epsilon, Y, n_movies, n_users, λ=.0)
newGradient = (posDelta-negDelta)/(2*Epsilon)
print ('%0.15f \t %0.15f \t %0.15f' % (newGradient, iGradient[i],newGradient-iGradient[i]))
In [17]:
Y = movieRating
βX = flatten_βX(β, X)
testGradient(βX, Y, n_movies, n_users)
In [18]:
movieList = {}
with open('../course_materials/movie_ids.txt', 'r') as movieFile:
for line in movieFile:
movieList[int(line.strip('\n').split(' ')[0])-1] = ' '.join(line.strip('\n').split(' ')[1:])
testRating = np.zeros((Y.shape[0],1))
testRating[0] = 4
testRating[97] = 2
testRating[6] = 3
testRating[11] = 5
testRating[53] = 4
testRating[63] = 5
testRating[65] = 3
testRating[68] = 5
testRating[182] = 4
testRating[225] = 5
testRating[354] = 5
testRating.shape
Out[18]:
In [19]:
# Add testRating to the movieRating (Y)
Y = np.hstack((Y, testRating))
n_movies, n_users = Y.shape
print(n_movies, n_users)
In [20]:
Y - (Y.sum(1)/(Y != 0).sum(1)).reshape(-1,1)
Out[20]:
In [21]:
def normaliseY(Y):
# To avoid bias zeros are not taken into account
meanY = Y.sum(1)/(Y != 0).sum(1)
return Y-meanY.reshape(-1,1), meanY.reshape(-1,1)
In [22]:
normalisedY, meanY = normaliseY(Y)
print(Y.shape)
print(meanY.shape)
In [23]:
# Generate random initial parameters, β and X
nFeatures = 10
X = np.random.rand(n_movies,nFeatures)
β = np.random.rand(n_users,nFeatures)
βX = flatten_βX(β, X)
# λ is set to 10 by Andrew Ng
λ = 10.
# cost_function optimisation
result = sp.optimize.fmin_cg(cost_function, x0=βX, fprime=get_gradient,
args=(Y, n_movies, n_users, λ),
maxiter=50,disp=True,full_output=True)
In [24]:
# Reshape the trained output into sensible "X" and "β" matrices
optβ, optX = reshape_βX(result[0], n_movies, n_users)
print(optX.shape)
print(optβ.shape)
In [25]:
# After training the model, now make recommendations by computing
# the predictions matrix
predictionRating = np.dot(optX, optβ.T)
print(predictionRating[:,-1].shape)
In [26]:
# Grab the last user's predictions (since I put my predictions at the
# end of the Y matrix, not the front)
# Add back in the mean movie ratings
testPrediction = predictionRating[:,-1] + meanY.flatten()
# print(testPrediction)
In [27]:
# Sort my predictions from highest to lowest
predictionIndicesSorted = np.argsort(testPrediction)
predictionIndicesSorted[:] = predictionIndicesSorted[::-1]
print ("Top recommendations for you:")
for i in range(10):
print ('Predicting rating %0.1f for movie %s.' %
(testPrediction[predictionIndicesSorted[i]],movieList[predictionIndicesSorted[i]]))
print ("\nOriginal ratings provided:")
for i in range(len(testRating)):
if testRating[i] > 0:
print ('Rated %d for movie %s.' % (testRating[i],movieList[i]))
In [ ]: