In [11]:
from nltk.corpus import twitter_samples
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier
In [2]:
twitter_samples.fileids()
Out[2]:
In [3]:
strings = twitter_samples.strings('negative_tweets.json')
for string in strings[:5]:
print(string)
In [6]:
def create_word_features(words):
# Write the code here
create_word_features(['the', 'quick', 'brown', 'the', 'jumps' , 'quick'])
Out[6]:
In [7]:
neg_reviews = []
# Write the code to extract the negative reviews here
print(neg_reviews[0])
print(len(neg_reviews))
In [8]:
pos_reviews = []
# Write the code to extract the positive reviews here
print(pos_reviews[0])
print(len(pos_reviews))
In [9]:
# Create the training and test set
print(len(train_set), len(test_set))
In [12]:
# Create a Naive bayes classifier
# Find the accuracy
In [17]:
print(string)
print(string.replace(":", ""))
print(string.replace(":", "").replace(")", "").replace("(", ""))
In [18]:
neg_reviews = []
# Repeat the above, this time replacing all smileys, like in example above
print(neg_reviews[0])
print(len(neg_reviews))
In [19]:
pos_reviews = []
# Repeat the above, this time replacing all smileys, like in example above
print(pos_reviews[0])
print(len(pos_reviews))
In [20]:
# Create train and test data again
# Create Classifier again
# Find the accuracy again
print(accuracy2 * 100)
In [ ]: