In [1]:
# Based on
# https://machinelearningmastery.com/prepare-text-data-machine-learning-scikit-learn/
In [2]:
import warnings
warnings.filterwarnings('ignore')
In [3]:
%matplotlib inline
%pylab inline
In [18]:
text = ["Ronaldo did the free kick, yes Ronaldo",
"Messi did the penalty",
"A striker did the penalty"]
In [19]:
from sklearn.feature_extraction.text import CountVectorizer
vectorizer = CountVectorizer()
vectorizer.fit(text)
Out[19]:
In [20]:
vectorizer.vocabulary_
Out[20]:
In [21]:
vector = vectorizer.transform(text)
In [22]:
vector.shape
Out[22]:
In [23]:
type(vector)
Out[23]:
In [24]:
vector.toarray()
Out[24]:
In [29]:
from sklearn.feature_extraction.text import TfidfVectorizer
vectorizer = TfidfVectorizer(norm=None)
vectorizer.fit(text)
Out[29]:
In [30]:
vector = vectorizer.transform(text)
vector.toarray()
Out[30]:
In [0]: