notebook.community

Edit and run



In [11]:

    
from nltk.corpus import twitter_samples
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier



In [2]:

    
twitter_samples.fileids()









    Out[2]:





['negative_tweets.json', 'positive_tweets.json', 'tweets.20150430-223406.json']



In [3]:

    
strings = twitter_samples.strings('negative_tweets.json')
for string in strings[:5]:
    print(string)









    



hopeless for tmr :(
Everything in the kids section of IKEA is so cute. Shame I'm nearly 19 in 2 months :(
@Hegelbon That heart sliding into the waste basket. :(
“@ketchBurning: I hate Japanese call him "bani" :( :(”

Me too
Dang starting next week I have "work" :(



In [6]:

    
def create_word_features(words):
        # Write the code here


create_word_features(['the', 'quick', 'brown', 'the', 'jumps' , 'quick'])









    Out[6]:





{'brown': True, 'jumps': True, 'quick': True}



In [7]:

    
neg_reviews = []

# Write the code to extract the negative reviews here
    
print(neg_reviews[0])
print(len(neg_reviews))









    



({'(': True, 'hopeless': True, 'tmr': True, ':': True}, 'negative')
5000



In [8]:

    
pos_reviews = []

# Write the code to extract the positive reviews here
    
print(pos_reviews[0])
print(len(pos_reviews))









    



({'top': True, 'PKuchly57': True, ':': True, '#': True, 'engaged': True, 'week': True, ')': True, 'FollowFriday': True, '@': True, 'France_Inte': True, 'Milipol_Paris': True, 'community': True, 'members': True}, 'positive')
5000



In [9]:

    
# Create the training and test set

print(len(train_set), len(test_set))



In [12]:

    
# Create a Naive bayes classifier
# Find the accuracy









    



97.39999999999999



In [17]:

    
print(string)
print(string.replace(":", ""))
print(string.replace(":", "").replace(")", "").replace("(", ""))









    



@DanielOConnel18 you could say he will have egg on his face :-)
@DanielOConnel18 you could say he will have egg on his face -)
@DanielOConnel18 you could say he will have egg on his face -



In [18]:

    
neg_reviews = []

# Repeat the above, this time replacing all smileys, like in example above

print(neg_reviews[0])
print(len(neg_reviews))









    



({'hopeless': True, 'tmr': True}, 'negative')
5000



In [19]:

    
pos_reviews = []

# Repeat the above, this time replacing all smileys, like in example above
    
print(pos_reviews[0])
print(len(pos_reviews))









    



({'top': True, 'FollowFriday': True, '@': True, 'community': True, 'France_Inte': True, '#': True, 'Milipol_Paris': True, 'engaged': True, 'week': True, 'PKuchly57': True, 'members': True}, 'positive')
5000



In [20]:

    
# Create train and test data again

# Create Classifier again

# Find the accuracy again
print(accuracy2 * 100)



In [ ]: