In [1]:
import spacy
from spacy import displacy

In [2]:
text = "Cuba victim's family wants shooter's assets seized - CNN #SmartNews Emotional \n" \
       "Jimmy Kimmel rips gun-control foes after Vegas shooting The Onion’s Las Vegas Shooting " \
       "shooting https://t.co/aTPUnGvz9c mashable Controlled Chaos at Las Vegas Hospital Trauma Center " \
       "shooting: Carla and Jae Unser hugged their children… " \
       "YouTube changed its search algorithm after reports revealed it was surfacing inaccurate " \
       "Stephen Paddock was 'upbeat, happy' as he bought guns"

1) Load spaCy's built-in language model

2) Create doc

3) Compare outputs of language model using 'en_core_web_sm' and 'en_core_web_lg'


In [3]:
nlp_sm = spacy.load('en_core_web_sm')
nlp_lg = spacy.load('en_core_web_lg')

In [4]:
doc_sm = nlp_sm(text)
doc_lg = nlp_lg(text)

print(doc_sm == doc_lg)                 # Check if outputs are identical (should be False)
print(doc_sm.text == doc_lg.text)       # Check if text is identical (should be True)
print(doc_sm.text)


False
True
Cuba victim's family wants shooter's assets seized - CNN #SmartNews Emotional 
Jimmy Kimmel rips gun-control foes after Vegas shooting The Onion’s Las Vegas Shooting shooting https://t.co/aTPUnGvz9c mashable Controlled Chaos at Las Vegas Hospital Trauma Center shooting: Carla and Jae Unser hugged their children… YouTube changed its search algorithm after reports revealed it was surfacing inaccurate Stephen Paddock was 'upbeat, happy' as he bought guns

In [5]:
for ent in doc_sm.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)


Cuba 0 4 GPE
CNN 53 56 ORG
SmartNews Emotional 
 58 79 ORG
Jimmy Kimmel 79 91 PERSON
Onion 139 144 ORG
Las Vegas 147 156 GPE
Las Vegas Hospital Trauma Center 228 260 ORG
Carla 271 276 PERSON
YouTube 314 321 ORG
Stephen Paddock 402 417 PERSON

In [6]:
for ent in doc_lg.ents:
    print(ent.text, ent.start_char, ent.end_char, ent.label_)


Cuba 0 4 GPE
CNN 53 56 ORG

Jimmy Kimmel 78 91 PERSON
Vegas 120 125 GPE
Onion 139 144 NORP
Las Vegas 147 156 GPE
Controlled Chaos 208 224 FAC
Las Vegas Hospital Trauma Center 228 260 FAC
Carla 271 276 PERSON
Jae Unser 281 290 PERSON
YouTube 314 321 PERSON
Stephen Paddock 402 417 PERSON

In [7]:
displacy.render(doc_sm, style='ent', jupyter=True)


Out[7]:
Cuba GPE victim's family wants shooter's assets seized - CNN ORG # SmartNews Emotional ORG Jimmy Kimmel PERSON rips gun-control foes after Vegas shooting The Onion ORG ’s Las Vegas GPE Shooting shooting https://t.co/aTPUnGvz9c mashable Controlled Chaos at Las Vegas Hospital Trauma Center ORG shooting: Carla PERSON and Jae Unser hugged their children… YouTube ORG changed its search algorithm after reports revealed it was surfacing inaccurate Stephen Paddock PERSON was 'upbeat, happy' as he bought guns

In [8]:
displacy.render(doc_lg, style='ent', jupyter=True)


Out[8]:
Cuba GPE victim's family wants shooter's assets seized - CNN ORG #SmartNews Emotional Jimmy Kimmel PERSON rips gun-control foes after Vegas GPE shooting The Onion NORP ’s Las Vegas GPE Shooting shooting https://t.co/aTPUnGvz9c mashable Controlled Chaos FAC at Las Vegas Hospital Trauma Center FAC shooting: Carla PERSON and Jae Unser PERSON hugged their children… YouTube PERSON changed its search algorithm after reports revealed it was surfacing inaccurate Stephen Paddock PERSON was 'upbeat, happy' as he bought guns

In [ ]: