In [4]:
!pip install -U spacy --user
In [ ]:
Make sure to run this first:
python -m spacy download en
In [1]:
import nltk
nltk.download('vader_lexicon')
Out[1]:
In [3]:
from __future__ import unicode_literals
import spacy
from spacy.tokens import Doc
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sentiment_analyzer = SentimentIntensityAnalyzer()
def polarity_scores(doc):
return sentiment_analyzer.polarity_scores(doc.text)
#Doc.set_extension('polarity_scores', getter=polarity_scores)
nlp = spacy.load('en')
doc = nlp("I'm so happy that finally all of us are together again")
print(doc._.polarity_scores)
In [4]:
doc = nlp("I just bought 2 shares at 9 a.m. because the stock went up 30% in just 2 days according to the WSJ")
for ent in doc.ents:
print(ent.label_, ent.text)
In [5]:
doc = nlp("Next week I'll be in Madrid.")
print([(token.text, token.tag_) for token in doc])
In [6]:
target = nlp("Cats are beautiful animals.")
doc1 = nlp("Dogs are awesome.")
doc2 = nlp("Some gorgeous creatures are felines.")
doc3 = nlp("Dolphins are swimming mammals.")
doc4 = nlp("I'm not happy with molecular physics and other such business")
print(target.similarity(doc1)) # 0.8901765218466683
print(target.similarity(doc2)) # 0.9115828449161616
print(target.similarity(doc3)) # 0.7822956752876101
print(target.similarity(doc4)) # 0.7822956752876101
In [7]:
from spacy.lang.en import English
from spacy.matcher import Matcher
nlp = English() # we only want the tokenizer, so no need to load a model
matcher = Matcher(nlp.vocab)
pos_emoji = [u'😀', u'😃', u'😂', u'🤣', u'😊', u'😍'] # positive emoji
neg_emoji = [u'😞', u'😠', u'😩', u'😢', u'😭', u'😒'] # negative emoji
# add patterns to match one or more emoji tokens
pos_patterns = [[{'ORTH': emoji}] for emoji in pos_emoji]
neg_patterns = [[{'ORTH': emoji}] for emoji in neg_emoji]
# function to label the sentiment
def label_sentiment(matcher, doc, i, matches):
match_id, start, end = matches[i]
if doc.vocab.strings[match_id] == 'HAPPY': # don't forget to get string!
doc.sentiment += 0.1 # add 0.1 for positive sentiment
elif doc.vocab.strings[match_id] == 'SAD':
doc.sentiment -= 0.1 # subtract 0.1 for negative sentiment
matcher.add('HAPPY', label_sentiment, *pos_patterns) # add positive pattern
matcher.add('SAD', label_sentiment, *neg_patterns) # add negative pattern
# add pattern for valid hashtag, i.e. '#' plus any ASCII token
matcher.add('HASHTAG', None, [{'ORTH': '#'}, {'IS_ASCII': True}])
doc = nlp(u"Hello world 😀 #MondayMotivation")
matches = matcher(doc)
for match_id, start, end in matches:
string_id = doc.vocab.strings[match_id] # look up string ID
span = doc[start:end]
print(string_id, span.text)
In [ ]: