In [2]:
from nltk.corpus import twitter_samples, TwitterCorpusReader
import sys
sys.path.append("../bhtsa")
from NBClassifier import NBClassifier

fileIds = twitter_samples.fileids()
root = twitter_samples.root

# read tweet data from corpus
negReader = TwitterCorpusReader(root, fileIds[0])
negTwt = []
posReader = TwitterCorpusReader(root, fileIds[1])
posTwt = []
for tweet in negReader.docs():
    negTwt.append((tweet['text']))
for tweet in posReader.docs():
    posTwt.append((tweet['text']))

In [3]:
# take a look at some of the tweets
print '=========================negative========================='
for twt in negTwt[:10]:
    print twt
print '=========================positive========================='
for twt in posTwt[:10]:
    print twt


=========================negative=========================
hopeless for tmr :(
Everything in the kids section of IKEA is so cute. Shame I'm nearly 19 in 2 months :(
@Hegelbon That heart sliding into the waste basket. :(
“@ketchBurning: I hate Japanese call him "bani" :( :(”

Me too
Dang starting next week I have "work" :(
oh god, my babies' faces :( https://t.co/9fcwGvaki0
@RileyMcDonough make me smile :((
@f0ggstar @stuartthull work neighbour on motors. Asked why and he said hates the updates on search :( http://t.co/XvmTUikWln
why?:("@tahuodyy: sialan:( https://t.co/Hv1i0xcrL2"
Athabasca glacier was there in #1948 :-( #athabasca #glacier #jasper #jaspernationalpark #alberta #explorealberta #… http://t.co/dZZdqmf7Cz
=========================positive=========================
#FollowFriday @France_Inte @PKuchly57 @Milipol_Paris for being top engaged members in my community this week :)
@Lamb2ja Hey James! How odd :/ Please call our Contact Centre on 02392441234 and we will be able to assist you :) Many thanks!
@DespiteOfficial we had a listen last night :) As You Bleed is an amazing track. When are you in Scotland?!
@97sides CONGRATS :)
yeaaaah yippppy!!!  my accnt verified rqst has succeed got a blue tick mark on my fb profile :) in 15 days
@BhaktisBanter @PallaviRuhail This one is irresistible :)
#FlipkartFashionFriday http://t.co/EbZ0L2VENM
We don't like to keep our lovely customers waiting for long! We hope you enjoy! Happy Friday! - LWWF :) https://t.co/smyYriipxI
@Impatientraider On second thought, there’s just not enough time for a DD :) But new shorts entering system. Sheep must be buying.
Jgh , but we have to go to Bayan :D bye
As an act of mischievousness, am calling the ETL layer of our in-house warehousing app Katamari.

Well… as the name implies :p.

In [4]:
# train naive bayes classifier use all tweets
NBC = NBClassifier()
print 'Training NBClassifier...'
NBC.train(posTwt, negTwt)
print 'Done!'


Training NBClassifier...
Done!

In [5]:
print NBC.informative_features()


Most Informative Features
            contains(ff) = True           positi : neg_tw =     31.7 : 1.0
           contains(bam) = True           positi : neg_tw =     29.7 : 1.0
           contains(sad) = True           neg_tw : positi =     24.8 : 1.0
     contains(community) = True           positi : neg_tw =     21.0 : 1.0
       contains(arrived) = True           positi : neg_tw =     18.7 : 1.0
          contains(glad) = True           positi : neg_tw =     17.0 : 1.0
     contains(goodnight) = True           positi : neg_tw =     16.3 : 1.0
           contains(ugh) = True           neg_tw : positi =     15.7 : 1.0
        contains(justin) = True           neg_tw : positi =     15.4 : 1.0
           contains(noo) = True           neg_tw : positi =     15.0 : 1.0
None

In [ ]: