05 Non_Negative Matrix Factorization Movies



In [1]:

    
from __future__ import print_function
from time import time
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
from sklearn.datasets import fetch_20newsgroups
import numpy as np
np.set_printoptions(suppress=True)



In [2]:

    
movies = ["Matrix","Alien","Serenity","Casablanca","Amelie"]
moviesxuser = np.array([
    [ 2,4,4,0, 0],
 [ 0,5,5,0, 0],
 [ 1,2,2,0, 0],
 [ 1,0,0,1, 1],
 [ 3,0,0,3, 3],
 [ 4,0,0,4, 4],
 [ 5,0,0,5, 5]])
print(movies)
print(moviesxuser)
n_components = 3









    



['Matrix', 'Alien', 'Serenity', 'Casablanca', 'Amelie']
[[2 4 4 0 0]
 [0 5 5 0 0]
 [1 2 2 0 0]
 [1 0 0 1 1]
 [3 0 0 3 3]
 [4 0 0 4 4]
 [5 0 0 5 5]]



In [3]:

    
# Fit the NMF model
t0 = time()
nmf = NMF(n_components=n_components, random_state=1,alpha=.1, l1_ratio=.5)
W = nmf.fit_transform(moviesxuser)
H = nmf.components_
print(W)
print("done in %0.3fs." % (time() - t0))









    



[[ 0.0120135   1.78641894  1.2475875 ]
 [ 0.          2.26289743  0.        ]
 [ 0.00693544  0.89088871  0.611759  ]
 [ 0.4851928   0.          0.        ]
 [ 1.46358762  0.          0.        ]
 [ 1.95278503  0.          0.        ]
 [ 2.44198244  0.          0.        ]]
done in 0.022s.



In [4]:

    
nmf.reconstruction_err_









    Out[4]:





0.15656451511696387



In [24]:

    
def print_top_words(model, feature_names, n_top_words):
    for topic_idx, topic in enumerate(model.components_):
        message = "Topic #%d: " % topic_idx
        message += " ".join([feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1] if topic[i] > 0] )
        print(message)
#        print(topic.argsort(),topic,feature_names)
    print()



In [25]:

    
print("\nTopics in NMF model (Frobenius norm):")
n_top_words = 5
print(H)

print_top_words(nmf, movies, n_top_words)









    



Topics in NMF model (Frobenius norm):
[[ 2.03605042  0.          0.          2.03591561  2.03591561]
 [ 0.02494155  2.19299697  2.19299697  0.          0.        ]
 [ 1.48864702  0.04094544  0.04094544  0.          0.        ]]
Topic #0: Matrix Amelie Casablanca
Topic #1: Serenity Alien Matrix
Topic #2: Matrix Serenity Alien



In [7]:

    
print(nmf.components_)









    



[[ 2.03605042  0.          0.          2.03591561  2.03591561]
 [ 0.02494155  2.19299697  2.19299697  0.          0.        ]
 [ 1.48864702  0.04094544  0.04094544  0.          0.        ]]



In [8]:

    
W_new = nmf.transform(np.array([[3, 0, 0,0,0]]))
print(W_new)









    



[[ 0.01902652  0.          1.92043194]]



In [9]:

    
W_new = nmf.transform(np.array([[0, 0, 0,0,5]]))
print(W_new)









    



[[ 0.81131346  0.          0.        ]]



In [10]:

    
W_new = nmf.transform(np.array([[1, 0, 0,1,0]]))
print(W_new)









    



[[ 0.24896827  0.          0.3014132 ]]



In [11]:

    
W_new = nmf.transform(np.array([[3, 0, 0,0,0],[0, 0, 0,0,5],[1, 0, 0,3,0]]))
print(W_new)









    



[[ 0.01906499  0.          1.92038056]
 [ 0.81131346  0.          0.        ]
 [ 0.64826063  0.          0.        ]]



In [12]:

    
print(np.around(np.dot(W,H)))
print(moviesxuser)









    



[[ 2.  4.  4.  0.  0.]
 [ 0.  5.  5.  0.  0.]
 [ 1.  2.  2.  0.  0.]
 [ 1.  0.  0.  1.  1.]
 [ 3.  0.  0.  3.  3.]
 [ 4.  0.  0.  4.  4.]
 [ 5.  0.  0.  5.  5.]]
[[2 4 4 0 0]
 [0 5 5 0 0]
 [1 2 2 0 0]
 [1 0 0 1 1]
 [3 0 0 3 3]
 [4 0 0 4 4]
 [5 0 0 5 5]]



In [ ]: