In [11]:
from nltk.corpus import twitter_samples
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier

In [2]:
twitter_samples.fileids()


Out[2]:
['negative_tweets.json', 'positive_tweets.json', 'tweets.20150430-223406.json']

In [3]:
strings = twitter_samples.strings('negative_tweets.json')
for string in strings[:5]:
    print(string)


hopeless for tmr :(
Everything in the kids section of IKEA is so cute. Shame I'm nearly 19 in 2 months :(
@Hegelbon That heart sliding into the waste basket. :(
“@ketchBurning: I hate Japanese call him "bani" :( :(”

Me too
Dang starting next week I have "work" :(

In [6]:
def create_word_features(words):
        # Write the code here


create_word_features(['the', 'quick', 'brown', 'the', 'jumps' , 'quick'])


Out[6]:
{'brown': True, 'jumps': True, 'quick': True}

In [7]:
neg_reviews = []

# Write the code to extract the negative reviews here
    
print(neg_reviews[0])
print(len(neg_reviews))


({'(': True, 'hopeless': True, 'tmr': True, ':': True}, 'negative')
5000

In [8]:
pos_reviews = []

# Write the code to extract the positive reviews here
    
print(pos_reviews[0])
print(len(pos_reviews))


({'top': True, 'PKuchly57': True, ':': True, '#': True, 'engaged': True, 'week': True, ')': True, 'FollowFriday': True, '@': True, 'France_Inte': True, 'Milipol_Paris': True, 'community': True, 'members': True}, 'positive')
5000

In [9]:
# Create the training and test set

print(len(train_set), len(test_set))


8000 2000

In [12]:
# Create a Naive bayes classifier
# Find the accuracy


97.39999999999999

In [17]:
print(string)
print(string.replace(":", ""))
print(string.replace(":", "").replace(")", "").replace("(", ""))


@DanielOConnel18 you could say he will have egg on his face :-)
@DanielOConnel18 you could say he will have egg on his face -)
@DanielOConnel18 you could say he will have egg on his face -

In [18]:
neg_reviews = []

# Repeat the above, this time replacing all smileys, like in example above

print(neg_reviews[0])
print(len(neg_reviews))


({'hopeless': True, 'tmr': True}, 'negative')
5000

In [19]:
pos_reviews = []

# Repeat the above, this time replacing all smileys, like in example above
    
print(pos_reviews[0])
print(len(pos_reviews))


({'top': True, 'FollowFriday': True, '@': True, 'community': True, 'France_Inte': True, '#': True, 'Milipol_Paris': True, 'engaged': True, 'week': True, 'PKuchly57': True, 'members': True}, 'positive')
5000

In [20]:
# Create train and test data again

# Create Classifier again

# Find the accuracy again
print(accuracy2 * 100)


75.8

In [ ]: