In [4]:
sentence = """European authorities fined Google a record $5.1 billion on Wednesday for
abusing its power in the mobile phone market and ordered the company to alter its practices"""
In [7]:
"""
Install spacy
$ pip install spacy
Download en_core_web_sm module
$ python -m spacy download en_core_web_sm
"""
import spacy
from spacy import displacy
from collections import Counter
import en_core_web_sm
nlp = en_core_web_sm.load()
doc = nlp(sentence)
[(X.text, X.label_) for X in doc.ents]
Out[7]:
Display dependency graph
In [8]:
displacy.render(nlp(str(sentence)), style='dep', jupyter = True, options = {'distance': 120})
In [9]:
[(x.orth_,x.pos_, x.lemma_)
for x in [y for y in nlp(sentence)
if not y.is_stop and y.pos_ != 'PUNCT']]
Out[9]:
Now let's see how to create text classifier using nltk and scikit learn.