In [1]:
import spacy
from spacy import displacy
In [2]:
text = "Cuba victim's family wants shooter's assets seized - CNN #SmartNews Emotional \n" \
"Jimmy Kimmel rips gun-control foes after Vegas shooting The Onion’s Las Vegas Shooting " \
"shooting https://t.co/aTPUnGvz9c mashable Controlled Chaos at Las Vegas Hospital Trauma Center " \
"shooting: Carla and Jae Unser hugged their children… " \
"YouTube changed its search algorithm after reports revealed it was surfacing inaccurate " \
"Stephen Paddock was 'upbeat, happy' as he bought guns"
1) Load spaCy's built-in language model
2) Create doc
3) Compare outputs of language model using 'en_core_web_sm' and 'en_core_web_lg'
In [3]:
nlp_sm = spacy.load('en_core_web_sm')
nlp_lg = spacy.load('en_core_web_lg')
In [4]:
doc_sm = nlp_sm(text)
doc_lg = nlp_lg(text)
print(doc_sm == doc_lg) # Check if outputs are identical (should be False)
print(doc_sm.text == doc_lg.text) # Check if text is identical (should be True)
print(doc_sm.text)
In [5]:
for ent in doc_sm.ents:
print(ent.text, ent.start_char, ent.end_char, ent.label_)
In [6]:
for ent in doc_lg.ents:
print(ent.text, ent.start_char, ent.end_char, ent.label_)
In [7]:
displacy.render(doc_sm, style='ent', jupyter=True)
Out[7]:
In [8]:
displacy.render(doc_lg, style='ent', jupyter=True)
Out[8]:
In [ ]: