In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd
# taken from http://stats.stackexchange.com/questions/73908/search-in-tf-idf
In [5]:
vectorizer = TfidfVectorizer(min_df=1)
my_phrases = ["boring answer phrase",
"exciting phrase",
"phrase on stackoverflow",
"answer on stackoverflow"]
my_features = vectorizer.fit_transform(my_phrases)
feature_words = vectorizer.get_feature_names()
In [6]:
D = {}
for doc_index, weights in enumerate(my_features.A):
D[doc_index] = pd.Series(weights, index=feature_words)
df = pd.DataFrame(D)
df_t = df.transpose()
In [7]:
df_t
Out[7]: