In [1]:
from __future__ import print_function
from time import time
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer
from sklearn.decomposition import NMF, LatentDirichletAllocation
from sklearn.datasets import fetch_20newsgroups
import numpy as np
np.set_printoptions(suppress=True)
In [2]:
movies = ["Matrix","Alien","Serenity","Casablanca","Amelie"]
moviesxuser = np.array([
[ 2,4,4,0, 0],
[ 0,5,5,0, 0],
[ 1,2,2,0, 0],
[ 1,0,0,1, 1],
[ 3,0,0,3, 3],
[ 4,0,0,4, 4],
[ 5,0,0,5, 5]])
print(movies)
print(moviesxuser)
n_components = 3
In [3]:
# Fit the NMF model
t0 = time()
nmf = NMF(n_components=n_components, random_state=1,alpha=.1, l1_ratio=.5)
W = nmf.fit_transform(moviesxuser)
H = nmf.components_
print(W)
print("done in %0.3fs." % (time() - t0))
In [4]:
nmf.reconstruction_err_
Out[4]:
In [24]:
def print_top_words(model, feature_names, n_top_words):
for topic_idx, topic in enumerate(model.components_):
message = "Topic #%d: " % topic_idx
message += " ".join([feature_names[i] for i in topic.argsort()[:-n_top_words - 1:-1] if topic[i] > 0] )
print(message)
# print(topic.argsort(),topic,feature_names)
print()
In [25]:
print("\nTopics in NMF model (Frobenius norm):")
n_top_words = 5
print(H)
print_top_words(nmf, movies, n_top_words)
In [7]:
print(nmf.components_)
In [8]:
W_new = nmf.transform(np.array([[3, 0, 0,0,0]]))
print(W_new)
In [9]:
W_new = nmf.transform(np.array([[0, 0, 0,0,5]]))
print(W_new)
In [10]:
W_new = nmf.transform(np.array([[1, 0, 0,1,0]]))
print(W_new)
In [11]:
W_new = nmf.transform(np.array([[3, 0, 0,0,0],[0, 0, 0,0,5],[1, 0, 0,3,0]]))
print(W_new)
In [12]:
print(np.around(np.dot(W,H)))
print(moviesxuser)
In [ ]: