The Objetive of the NMF is Find two non-negative matrices (W, H) whose product approximates the non-negative matrix X. This factorization can be used for example for dimensionality reduction, source separation or topic extraction.
In [1]:
import numpy as np
from sklearn.decomposition import NMF
Defining the model
In [25]:
K = 4
model = NMF(n_components=K)
model
Out[25]:
A test Array
In [26]:
Original = [
[5,3,0,1],
[4,0,0,1],
[3,2,0,0],
[7,0,4,1],
[0,2,5,0]
]
Original = np.array(Original)
In [27]:
W = model.fit_transform(Original)
H = model.components_
In [28]:
print("W\n",np.round(W,2))
print("H\n",np.round(H,2))
Now lets see how mush close to X are WxH and lets call it crossValue
In [6]:
crossValue = np.dot(W,H)
print("crossValue \n",crossValue)
print("rounded Values\n",np.round(crossValue,2))
print("Original\n",Original)
In [17]:
import matplotlib.pyplot as plt
def plotCompare(Original,prediction):
N = Original.shape[0]
last = Original.shape[1]-1
ind = np.arange(N) # the x locations for the groups
width = 0.17 # the width of the bars
fig, ax = plt.subplots()
rects1 = ax.bar(ind, Original[:,last], width, color='r')
rects2 = ax.bar(ind + width, prediction[:,last], width, color='b')
# rects3 = ax.bar(ind + width+width, np.round(prediction[:,last],2), width, color='g')
# add some text for labels, title and axes ticks
ax.set_ylabel('Last Value')
ax.set_title('Row Values')
ax.set_xticks(ind+ width / last)
ax.set_xticklabels(('G1', 'G2', 'G3', 'G4','G5','G6'))
ax.legend((rects1[0], rects2[0]), ('Original', 'Cross Value'))
plt.show()
In [18]:
plotCompare(Original,crossValue)
Modified by DavidGutierrez
R : A matrix to be factorized, dimension N x M
K : The number of latent features
Steps : The maximum number of steps to perform the optimisation
Alpha : The learning rate
Beta : The regularization parameter
The final matrices W and H
In [9]:
def matrix_factorization(R, K = 2, steps=5000, alpha=0.0002, beta=0.02,error = 0.001):
W = np.random.rand(len(R),K)
H = np.random.rand(K,len(R[0]))
for step in range(steps):
for i in range(len(R)):
for j in range(len(R[i])):
if R[i][j] > 0:
eij = R[i][j] - np.dot(W[i,:],H[:,j])
for k in range(K):
W[i][k] = W[i][k] + alpha * (2 * eij * H[k][j] - beta * W[i][k])
H[k][j] = H[k][j] + alpha * (2 * eij * W[i][k] - beta * H[k][j])
# eR = np.dot(W,H)
e = 0
for i in range(len(R)):
for j in range(len(R[i])):
if R[i][j] > 0:
e = e + pow(R[i][j] - np.dot(W[i,:],H[:,j]), 2)
for k in range(K):
e = e + (beta/2) * ( pow(W[i][k],2) + pow(H[k][j],2) )
if e < error:
break
return W,H
In [10]:
W, H = matrix_factorization(Original,K)
In [11]:
W
Out[11]:
In [12]:
H
Out[12]:
In [13]:
prediction = np.dot(W,H)
print(prediction)
In [14]:
np.around(prediction,2)
Out[14]:
In [15]:
Original
Out[15]:
In [19]:
plotCompare(Original,prediction)
In [ ]: