In [38]:
import nltk.classify.util
from nltk.classify import NaiveBayesClassifier
from nltk.corpus import movie_reviews
from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

In [2]:
# This is how the Naive Bayes classifier expects the input
def create_word_features(words):
    useful_words = [word for word in words if word not in stopwords.words("english")]
    my_dict = dict([(word, True) for word in useful_words])
    return my_dict

create_word_features(["the", "quick", "brown", "quick", "a", "fox"])


Out[2]:
{'brown': True, 'fox': True, 'quick': True}

In [3]:
neg_reviews = []
for fileid in movie_reviews.fileids('neg'):
    words = movie_reviews.words(fileid)
    neg_reviews.append((create_word_features(words), "negative"))
    
print(neg_reviews[0])    
print(len(neg_reviews))


({'point': True, 'chase': True, 'idea': True, 'wes': True, 'church': True, 'ending': True, 'us': True, 'personally': True, 'joblo': True, 'ways': True, 'came': True, 'one': True, 'despite': True, 'hide': True, 'bottom': True, 'someone': True, 'overall': True, 'runtime': True, 'drive': True, 'unravel': True, 'going': True, 'blair': True, 'explained': True, 'generation': True, 'go': True, 'ago': True, 'away': True, 've': True, 'member': True, 'problems': True, '&': True, 'secret': True, 'skip': True, 'write': True, 'hot': True, 'downshifts': True, 'lazy': True, 'coming': True, 'password': True, 'running': True, 'sitting': True, 'know': True, 'understanding': True, 'decent': True, 'mind': True, 'weird': True, 'redundant': True, 'deal': True, 'seem': True, 'decided': True, 'need': True, 'every': True, 'starts': True, '2': True, 'witch': True, 'didn': True, 'drink': True, 'confusing': True, 'entertaining': True, 'life': True, 'audience': True, 'dig': True, 'memento': True, '4': True, 'american': True, 'touches': True, 'taken': True, '3': True, 'teen': True, 'tons': True, 'feeling': True, 'minutes': True, 'giving': True, ')': True, 'explanation': True, 'crow': True, 'salvation': True, 'bentley': True, 'bit': True, 'head': True, 'dead': True, 'showing': True, 'production': True, 'wrapped': True, 'exact': True, 'watch': True, 'half': True, 'well': True, ',': True, 'meantime': True, '7': True, 'got': True, 'completely': True, 'mess': True, 'kind': True, 'stick': True, 'kids': True, 'playing': True, 'simply': True, 'seemed': True, 'obviously': True, 'would': True, 'feels': True, 'plain': True, 'make': True, 'highway': True, '?': True, 'people': True, 'might': True, '.': True, 'fantasy': True, 'strangeness': True, 'things': True, 'offering': True, 'big': True, 'studio': True, 'review': True, 'different': True, 'shelves': True, 'back': True, 'apparently': True, 'sagemiller': True, 'thrilling': True, 'disappearances': True, 'sure': True, 'director': True, 'really': True, 'cool': True, 'dies': True, 'start': True, 'guys': True, 'makes': True, 'give': True, 'looooot': True, 'presents': True, 'line': True, 'unraveling': True, 'types': True, 'video': True, 'edge': True, '20': True, 'suits': True, 'strange': True, 'sad': True, '/': True, '(': True, 'concept': True, 'executed': True, '9': True, 'jumbled': True, '-': True, 'chasing': True, 'attempt': True, '"': True, 'flick': True, 'actors': True, 'clue': True, 'craziness': True, 'apparitions': True, 'generally': True, 'nightmare': True, 'folks': True, 'film': True, 'dreams': True, 'arrow': True, 'packaged': True, 'main': True, 'movie': True, 'plot': True, ':': True, 'even': True, 'want': True, 'genre': True, 'horror': True, 'neighborhood': True, 'guess': True, 'character': True, 'kudos': True, 'doesn': True, 'little': True, 'fed': True, 'echoes': True, 'chopped': True, 'entertain': True, 'visions': True, 'girlfriend': True, "'": True, 'continues': True, 'okay': True, 'making': True, 'music': True, 'years': True, '!': True, 'nightmares': True, 'somewhere': True, 'package': True, 'rarely': True, 'lost': True, 'beauty': True, 'critique': True, 'elm': True, 'part': True, 'actually': True, 'always': True, 'harder': True, 'sense': True, 'oh': True, 'figured': True, 'final': True, 'snag': True, 'like': True, 'scenes': True, 'assuming': True, 'fuck': True, 'took': True, 'also': True, 'mold': True, 'ever': True, 'since': True, 'entire': True, 'others': True, 'neat': True, 'whatever': True, 'world': True, 'see': True, 'shows': True, 'get': True, 'bad': True, 'good': True, 'stir': True, 'pretty': True, 'street': True, 'enter': True, 'find': True, 'terribly': True, 'way': True, 'couples': True, 'party': True, 'insight': True, 'applaud': True, 'characters': True, 'two': True, 'given': True, 'five': True, 'correctly': True, 'normal': True, 'excites': True, 'although': True, '8': True, '10': True, 'trying': True, 'flicks': True, 'turning': True, 'happen': True, 'holds': True, 'sorta': True, 'problem': True, 'look': True, 'slasher': True, 'throughout': True, 'movies': True, 'break': True, 'films': True, 'accident': True, 'still': True, 'biggest': True, 'seems': True, 'new': True, 'engaging': True, 'mean': True, 'melissa': True}, 'negative')
1000

In [4]:
pos_reviews = []
for fileid in movie_reviews.fileids('pos'):
    words = movie_reviews.words(fileid)
    pos_reviews.append((create_word_features(words), "positive"))
    
#print(pos_reviews[0])    
print(len(pos_reviews))


1000

In [5]:
train_set = neg_reviews[:750] + pos_reviews[:750]
test_set =  neg_reviews[750:] + pos_reviews[750:]
print(len(train_set),  len(test_set))


1500 500

In [6]:
classifier = NaiveBayesClassifier.train(train_set)

In [7]:
accuracy = nltk.classify.util.accuracy(classifier, test_set)
print(accuracy * 100)


72.6

In [35]:
review_santa = '''

It would be impossible to sum up all the stuff that sucks about this film, so I'll break it down into what I remember most strongly: a man in an ingeniously fake-looking polar bear costume (funnier than the "bear" from Hercules in New York); an extra with the most unnatural laugh you're ever likely to hear; an ex-dope addict martian with tics; kid actors who make sure every syllable of their lines are slowly and caaarreee-fulll-yyy prrooo-noun-ceeed; a newspaper headline stating that Santa's been "kidnaped", and a giant robot. Yes, you read that right. A giant robot.

The worst acting job in here must be when Mother Claus and her elves have been "frozen" by the "Martians'" weapons. Could they be *more* trembling? I know this was the sixties and everyone was doped up, but still.
'''
print(review_santa )



It would be impossible to sum up all the stuff that sucks about this film, so I'll break it down into what I remember most strongly: a man in an ingeniously fake-looking polar bear costume (funnier than the "bear" from Hercules in New York); an extra with the most unnatural laugh you're ever likely to hear; an ex-dope addict martian with tics; kid actors who make sure every syllable of their lines are slowly and caaarreee-fulll-yyy prrooo-noun-ceeed; a newspaper headline stating that Santa's been "kidnaped", and a giant robot. Yes, you read that right. A giant robot.

The worst acting job in here must be when Mother Claus and her elves have been "frozen" by the "Martians'" weapons. Could they be *more* trembling? I know this was the sixties and everyone was doped up, but still.


In [39]:
words = word_tokenize(review_santa)
words = create_word_features(words)
classifier.classify(words)


Out[39]:
'negative'

In [40]:
review_spirit = '''
Spirited Away' is the first Miyazaki I have seen, but from this stupendous film I can tell he is a master storyteller. A hallmark of a good storyteller is making the audience empathise or pull them into the shoes of the central character. Miyazaki does this brilliantly in 'Spirited Away'. During the first fifteen minutes we have no idea what is going on. Neither does the main character Chihiro. We discover the world as Chihiro does and it's truly amazing to watch. But Miyazaki doesn't seem to treat this world as something amazing. The world is filmed just like our workaday world would. The inhabitants of the world go about their daily business as usual as full with apathy as us normal folks. Places and buildings are not greeted by towering establishing shots and majestic music. The fact that this place is amazing doesn't seem to concern Miyazaki.

What do however, are the characters. Miyazaki lingers upon the characters as if they were actors. He infixes his animated actors with such subtleties that I have never seen, even from animation giants Pixar. Twenty minutes into this film and I completely forgot these were animated characters; I started to care for them like they were living and breathing. Miyazaki treats the modest achievements of Chihiro with unashamed bombast. The uplifting scene where she cleanses the River God is accompanied by stirring music and is as exciting as watching gladiatorial combatants fight. Of course, by giving the audience developed characters to care about, the action and conflicts will always be more exciting, terrifying and uplifting than normal, generic action scenes. 
'''
print(review_spirit)


Spirited Away' is the first Miyazaki I have seen, but from this stupendous film I can tell he is a master storyteller. A hallmark of a good storyteller is making the audience empathise or pull them into the shoes of the central character. Miyazaki does this brilliantly in 'Spirited Away'. During the first fifteen minutes we have no idea what is going on. Neither does the main character Chihiro. We discover the world as Chihiro does and it's truly amazing to watch. But Miyazaki doesn't seem to treat this world as something amazing. The world is filmed just like our workaday world would. The inhabitants of the world go about their daily business as usual as full with apathy as us normal folks. Places and buildings are not greeted by towering establishing shots and majestic music. The fact that this place is amazing doesn't seem to concern Miyazaki.

What do however, are the characters. Miyazaki lingers upon the characters as if they were actors. He infixes his animated actors with such subtleties that I have never seen, even from animation giants Pixar. Twenty minutes into this film and I completely forgot these were animated characters; I started to care for them like they were living and breathing. Miyazaki treats the modest achievements of Chihiro with unashamed bombast. The uplifting scene where she cleanses the River God is accompanied by stirring music and is as exciting as watching gladiatorial combatants fight. Of course, by giving the audience developed characters to care about, the action and conflicts will always be more exciting, terrifying and uplifting than normal, generic action scenes. 


In [41]:
words = word_tokenize(review_spirit)
words = create_word_features(words)
classifier.classify(words)


Out[41]:
'positive'

In [ ]: