In [8]:
from sklearn.feature_extraction.text import TfidfVectorizer
import pandas as pd

# taken from http://stats.stackexchange.com/questions/73908/search-in-tf-idf

In [5]:
vectorizer = TfidfVectorizer(min_df=1)

my_phrases = ["boring answer phrase",
              "exciting phrase",
              "phrase on stackoverflow",
              "answer on stackoverflow"]

my_features = vectorizer.fit_transform(my_phrases)

feature_words = vectorizer.get_feature_names()

In [6]:
D = {}
for doc_index, weights in enumerate(my_features.A):
    D[doc_index] = pd.Series(weights, index=feature_words)
df = pd.DataFrame(D)

df_t = df.transpose()

In [7]:
df_t


Out[7]:
answer boring exciting on phrase stackoverflow
0 0.553492 0.702035 0.000000 0.000000 0.448100 0.000000
1 0.000000 0.000000 0.842926 0.000000 0.538029 0.000000
2 0.000000 0.000000 0.000000 0.613667 0.496816 0.613667
3 0.577350 0.000000 0.000000 0.577350 0.000000 0.577350