In [3]:
import pandas as pd
import numpy as np
from sklearn.feature_extraction.text import CountVectorizer, TfidfVectorizer
In [4]:
corpus = ['This is first sentence', 'Here is the second sentence', 'Third sentence']
In [5]:
count_vec = CountVectorizer()
features = count_vec.fit_transform(corpus)
In [6]:
pd.DataFrame(features.todense(), columns=count_vec.get_feature_names())
Out[6]:
In [7]:
tfidf = TfidfVectorizer()
features_tfidf = tfidf.fit_transform(corpus)
In [8]:
pd.DataFrame(features_tfidf.todense(), columns=tfidf.get_feature_names())
Out[8]: