In [1]:
from nltk.tokenize import TreebankWordTokenizer
sentence = "How does nltk tokenize this sentence?"
tokenizer = TreebankWordTokenizer()
tokenizer.tokenize(sentence)
Out[1]:
In [5]:
from nltk.tokenize.casual import casual_tokenize
tweet = "OMG @twitterguy that was sooooooooo cool :D :D :D!!!!"
print(casual_tokenize(tweet))
In [4]:
casual_tokenize(tweet, reduce_len=True, strip_handles=True)
Out[4]:
In [16]:
from nltk.util import ngrams
list(ngrams(sentence.split(), 2))
Out[16]:
In [20]:
import nltk
nltk.download("stopwords")
stop_words = nltk.corpus.stopwords.words("english")
stop_words[:10]
Out[20]:
In [38]:
nltk.download('vader_lexicon')
from nltk.sentiment.vader import SentimentIntensityAnalyzer
sia = SentimentIntensityAnalyzer()
In [39]:
negative_sentence = "This is the worst!!! I hate it so much :( :("
sia.polarity_scores(negative_sentence)
Out[39]:
In [40]:
sia.polarity_scores(tweet)
Out[40]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: