In [4]:

    
!pip install -U spacy --user









    



/usr/bin/python: No module named pathlib; 'spacy' is a package and cannot be directly executed



In [ ]:

    
Make sure to run this first:
    
    python -m spacy download en



In [1]:

    
import nltk
nltk.download('vader_lexicon')









    



[nltk_data] Downloading package vader_lexicon to
[nltk_data]     /Users/itpstudent/nltk_data...
[nltk_data]   Package vader_lexicon is already up-to-date!






    Out[1]:





True



In [3]:

    
from __future__ import unicode_literals
import spacy
from spacy.tokens import Doc
from nltk.sentiment.vader import SentimentIntensityAnalyzer


sentiment_analyzer = SentimentIntensityAnalyzer()
def polarity_scores(doc):
    return sentiment_analyzer.polarity_scores(doc.text)
 
#Doc.set_extension('polarity_scores', getter=polarity_scores)
 
nlp = spacy.load('en')
doc = nlp("I'm so happy that finally all of us are together again")
print(doc._.polarity_scores)









    



{'neg': 0.0, 'neu': 0.715, 'pos': 0.285, 'compound': 0.6115}



In [4]:

    
doc = nlp("I just bought 2 shares at 9 a.m. because the stock went up 30% in just 2 days according to the WSJ")

for ent in doc.ents:
    print(ent.label_, ent.text)









    



(u'CARDINAL', u'2')
(u'TIME', u'9 a.m.')
(u'PERCENT', u'30%')
(u'DATE', u'just 2 days')
(u'ORG', u'WSJ')



In [5]:

    
doc = nlp("Next week I'll be in Madrid.")
print([(token.text, token.tag_) for token in doc])









    



[(u'Next', u'JJ'), (u'week', u'NN'), (u'I', u'PRP'), (u"'ll", u'MD'), (u'be', u'VB'), (u'in', u'IN'), (u'Madrid', u'NNP'), (u'.', u'.')]



In [6]:

    
target = nlp("Cats are beautiful animals.")
doc1 = nlp("Dogs are awesome.")
doc2 = nlp("Some gorgeous creatures are felines.")
doc3 = nlp("Dolphins are swimming mammals.")
doc4 = nlp("I'm not happy with molecular physics and other such business")
print(target.similarity(doc1))  # 0.8901765218466683
print(target.similarity(doc2))  # 0.9115828449161616
print(target.similarity(doc3))  # 0.7822956752876101
print(target.similarity(doc4))  # 0.7822956752876101









    



0.792721354574
0.798225896087
0.801580147422
0.528548488716



In [7]:

    
from spacy.lang.en import English
from spacy.matcher import Matcher

nlp = English()  # we only want the tokenizer, so no need to load a model
matcher = Matcher(nlp.vocab)

pos_emoji = [u'😀', u'😃', u'😂', u'🤣', u'😊', u'😍']  # positive emoji
neg_emoji = [u'😞', u'😠', u'😩', u'😢', u'😭', u'😒']  # negative emoji

# add patterns to match one or more emoji tokens
pos_patterns = [[{'ORTH': emoji}] for emoji in pos_emoji]
neg_patterns = [[{'ORTH': emoji}] for emoji in neg_emoji]

# function to label the sentiment
def label_sentiment(matcher, doc, i, matches):
    match_id, start, end = matches[i]
    if doc.vocab.strings[match_id] == 'HAPPY':  # don't forget to get string!
        doc.sentiment += 0.1  # add 0.1 for positive sentiment
    elif doc.vocab.strings[match_id] == 'SAD':
        doc.sentiment -= 0.1  # subtract 0.1 for negative sentiment

matcher.add('HAPPY', label_sentiment, *pos_patterns)  # add positive pattern
matcher.add('SAD', label_sentiment, *neg_patterns)  # add negative pattern

# add pattern for valid hashtag, i.e. '#' plus any ASCII token
matcher.add('HASHTAG', None, [{'ORTH': '#'}, {'IS_ASCII': True}])

doc = nlp(u"Hello world 😀 #MondayMotivation")
matches = matcher(doc)
for match_id, start, end in matches:
    string_id = doc.vocab.strings[match_id]  # look up string ID
    span = doc[start:end]
    print(string_id, span.text)









    



(u'HAPPY', u'\U0001f600')
(u'HASHTAG', u'#MondayMotivation')



In [ ]: