In [16]:
'''
NLTK cookbook Naive Bayes Classifier.
'''
__author__ = 'Xia Wang'
In [17]:
from nltk.classify import util
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import movie_reviews
In [18]:
# simplied feature extraction method
def word_feats(words):
return dict([(word, True) for word in words])
In [19]:
negids = movie_reviews.fileids('neg')
posids = movie_reviews.fileids('pos')
In [24]:
negfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'neg') for f in negids]
posfeats = [(word_feats(movie_reviews.words(fileids=[f])), 'pos') for f in posids]
In [25]:
negcutoff = len(negfeats) * 3/4
poscutoff = len(posfeats) * 3/4
In [26]:
trainfeats = negfeats[:negcutoff] + posfeats[:poscutoff]
testfeats = negfeats[negcutoff:] + posfeats[poscutoff:]
In [27]:
nbmdl = NaiveBayesClassifier.train(trainfeats)
print 'The accuracy is {}'.format(util.accuracy(nbmdl, testfeats))
nbmdl.show_most_informative_features()
In [ ]: