In [1]:
our_texts=["The cat sat on the mat",
"The cat saw the other cat on Sat while she sat",
"Excellent Smithers! Use the saw on the cat in the magic trick with the dog",
"Excellent magic saw Smithers on the dog"]
In [2]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer=TfidfVectorizer(min_df=0.1, stop_words='english', use_idf=True)
document_term_matrix=vectorizer.fit_transform(our_texts)
vocab=vectorizer.get_feature_names()
In [3]:
vocab
Out[3]:
In [4]:
import pandas as pd
In [5]:
pd.DataFrame(document_term_matrix.toarray(), columns=vocab)
Out[5]:
In [6]:
from sklearn.metrics.pairwise import cosine_similarity
In [7]:
similarity=cosine_similarity(document_term_matrix)
In [8]:
pd.DataFrame(similarity)
Out[8]:
In [9]:
similarity=cosine_similarity(document_term_matrix.T)
In [10]:
pd.DataFrame(similarity, index=vocab, columns=vocab)
Out[10]:
In [ ]: