In [14]:
import nltk
import random
from nltk.corpus import movie_reviews
from nltk.corpus import stopwords
import pickle

from nltk.classify.scikitlearn import SklearnClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, BernoulliNB

from sklearn.linear_model import LogisticRegression, SGDClassifier
from sklearn.svm import SVC, LinearSVC, NuSVC

In [2]:
stop_words = stopwords.words("english")
documents = [(list(movie_reviews.words(fileid)), category)
            for category in movie_reviews.categories()
             for fileid in movie_reviews.fileids(category)
            ]
random.shuffle(documents)

In [3]:
all_words = []
for w in movie_reviews.words():
    all_words.append(w.lower())

Making a frequency distribution of the words


In [4]:
all_words = nltk.FreqDist(all_words)
all_words.most_common(20)


Out[4]:
[(',', 77717),
 ('the', 76529),
 ('.', 65876),
 ('a', 38106),
 ('and', 35576),
 ('of', 34123),
 ('to', 31937),
 ("'", 30585),
 ('is', 25195),
 ('in', 21822),
 ('s', 18513),
 ('"', 17612),
 ('it', 16107),
 ('that', 15924),
 ('-', 15595),
 (')', 11781),
 ('(', 11664),
 ('as', 11378),
 ('with', 10792),
 ('for', 9961)]

In [5]:
feature_words = list(all_words.keys())[:5000]
def find_features(document):
    words = set(document)
    feature = {}
    for w in feature_words:
        feature[w] = (w in words)
    return feature

In [6]:
feature_sets = [(find_features(rev), category) for (rev, category) in documents]

In [7]:
feature_sets[:1]


Out[7]:
[({'honored': False,
   'squalor': False,
   'lattitude': False,
   'bruiser': False,
   'civilisation': False,
   'winstone': False,
   'overlays': False,
   'roadrunner': False,
   'compliments': False,
   'phlegmming': False,
   'farrow': False,
   'tranquility': False,
   'drowned': False,
   '90': False,
   'unpopular': False,
   'mandible': False,
   'hubbub': False,
   'gregory': False,
   'trini': False,
   'maclachlan': False,
   'beales': False,
   'coping': False,
   'customer': False,
   'extracting': False,
   'grumpiest': False,
   'teleprompted': False,
   'result': False,
   'saved': False,
   'coinciding': False,
   'democracy': False,
   'which': True,
   'wednesday': False,
   'reconcile': False,
   'divided': False,
   'suppoosed': False,
   'dizzy': False,
   'cute': False,
   'scout': False,
   'nevers': False,
   'likeness': False,
   'yourself': False,
   'harvey': False,
   'romanov': False,
   'speculations': False,
   'mucus': False,
   'dropper': False,
   'lhermite': False,
   'entourage': False,
   'gio': False,
   'profoundly': False,
   'ibenez': False,
   'excerpted': False,
   'creature': False,
   'festive': False,
   'prospecting': False,
   'landscape': False,
   'carrefour': False,
   'dianne': False,
   'claustral': False,
   'amulet': False,
   'foregrounded': False,
   'avenger': False,
   'knuckle': False,
   'lotta': False,
   'banned': False,
   'civil': False,
   'moira': False,
   'masahitko': False,
   'candy': False,
   'wonderland': False,
   'blunder': False,
   'costing': False,
   'tit': False,
   'forceful': False,
   'hogarth': False,
   'dies': False,
   'boner': False,
   'indifference': False,
   'durham': False,
   'independence': False,
   'alvin': False,
   'gowns': False,
   'historian': False,
   'undead': False,
   'contain': True,
   'ruled': False,
   'loved': False,
   'helmet': False,
   'flow': False,
   'madison': False,
   'expense': False,
   'seatbelts': False,
   'moranis': False,
   'senses': False,
   'urges': False,
   'margins': False,
   'continent': False,
   'stepahne': False,
   'nightlife': False,
   'secretaries': False,
   'swimming': False,
   'limey': False,
   'putty': False,
   'precinct': False,
   'larch': False,
   'blotted': False,
   'slattery': False,
   'faulty': False,
   'sacred': False,
   'referenced': False,
   'watchabe': False,
   'laced': False,
   'marker': False,
   'crystal': False,
   'execution': False,
   'boomer': False,
   'ey': False,
   'benigni': False,
   'plan': False,
   'immediately': False,
   'persuasive': False,
   'tamuera': False,
   'possessions': False,
   'suggestiveness': False,
   'postmodernism': False,
   'appeals': False,
   'alleviates': False,
   'infallible': False,
   'couldn': False,
   'beowolf': False,
   'morgan': False,
   'jew': False,
   'langer': False,
   'progressing': False,
   'photographer': False,
   'wrestled': False,
   'confirmed': False,
   'locally': False,
   'linklater': False,
   'arlo': False,
   'carve': False,
   '_full_house_': False,
   'navigated': False,
   'bahamas': False,
   'handles': False,
   'emo': False,
   'tediousness': False,
   'amass': False,
   'disinterested': False,
   'masturbator': False,
   'torches': False,
   'overstay': False,
   'noticed': False,
   'visually': False,
   'pattern': False,
   'coffin': False,
   'dark': False,
   'aragon': False,
   'frustrated': False,
   'deference': False,
   'stricter': False,
   'hors': False,
   'overflying': False,
   'dunne': False,
   'perturbed': False,
   'pedal': False,
   'joyride': False,
   'courtship': False,
   'pardon': False,
   'believable': True,
   'nobleman': False,
   'violins': False,
   'safety': False,
   'hatchett': False,
   'gruffudd': False,
   'hooey': False,
   'embracing': False,
   'gravedigging': False,
   'neurological': False,
   'deliberately': False,
   'mothballs': False,
   'deprecating': False,
   '1913': False,
   'peddle': False,
   'tiering': False,
   'cloaks': False,
   'awoke': False,
   'rongguang': False,
   'thug': False,
   'ingrained': False,
   'horrifies': False,
   'nuke': False,
   'employees': False,
   'bounty': False,
   'bread': False,
   'prepare': False,
   'overboard': False,
   'stubbornly': False,
   'shapeshifting': False,
   'colisseum': False,
   'luchini': False,
   'buketer': False,
   'criterion': False,
   'congolese': False,
   'useless': False,
   'inimitable': False,
   'signifcance': False,
   'tarlov': False,
   'na': False,
   'vylette': False,
   'barber': False,
   'sprinkle': False,
   'echo': False,
   'body': False,
   'mcalisters': False,
   'deflated': False,
   'finchers': False,
   'eqsuisite': False,
   'breakdancing': False,
   'usage': False,
   'ambiguously': False,
   'lifeforce': False,
   'compilation': False,
   'ugliness': False,
   'equal': False,
   'cronos': False,
   'goblins': False,
   'murderer': False,
   'lithium': False,
   'defends': False,
   'subdesarrollo': False,
   'mandoki': False,
   'yorkers': False,
   'runners': False,
   'metro': False,
   'values': False,
   'memories': False,
   'soleil': False,
   'veinotte': False,
   'confusedly': False,
   'troop': False,
   'nerdy': False,
   'oozed': False,
   'abortions': False,
   'ontop': False,
   'carlin': False,
   'decorating': False,
   'businessmen': False,
   'methodical': False,
   'bombastically': False,
   'resurface': False,
   'void': False,
   'uneven': False,
   'mutters': False,
   'mission': False,
   'participants': False,
   'immaculate': False,
   'turner': False,
   'scoffed': False,
   'ghoul': False,
   'shove': False,
   'sipping': False,
   'koolaid': False,
   'key': False,
   'polo': False,
   'insubordination': False,
   'cops': False,
   'trevor': False,
   '_54_': False,
   'corrupter': False,
   'manicured': False,
   'sweeney': False,
   'turnabout': False,
   'wisecracking': False,
   'bravura': False,
   'prospector': False,
   'bevy': False,
   'inviting': False,
   'horseman': False,
   'bane': False,
   'nighttime': False,
   'typecast': False,
   'prophets': False,
   'veterans': False,
   'identity': False,
   'heralding': False,
   'lehman': False,
   'hunka': False,
   'damage': False,
   '1922': False,
   'conveniences': False,
   'glowering': False,
   'zephyr': False,
   'albanian': False,
   'incredulously': False,
   'cong': False,
   'neighing': False,
   'success': False,
   'sleepers': False,
   'warfare': False,
   'seamy': False,
   'vie': False,
   'mugs': False,
   'vy': False,
   'roswell': False,
   'walked': False,
   'garicia': False,
   'resorting': False,
   'clooney': False,
   'hashish': False,
   'lifeform': False,
   'collage': False,
   'ilah': False,
   'dario': False,
   'squirrel': False,
   'molester': False,
   'tidy': False,
   'tackling': False,
   'uncompleted': False,
   'incite': False,
   'sprinkling': False,
   'labeling': False,
   'sizzles': False,
   'bluer': False,
   'attracting': False,
   'deposited': False,
   'maniacally': False,
   'pollutes': False,
   'narration': False,
   'raid': False,
   'campion': False,
   'riddler': False,
   'flatulent': False,
   'holders': False,
   'conclude': False,
   'santiago': False,
   'ashley': False,
   'johansson': False,
   'tidal': False,
   'podrace': False,
   'corporate': False,
   'ferland': False,
   'alecky': False,
   'shadings': False,
   'deride': False,
   'courts': False,
   'hackman': False,
   'arrested': False,
   'hoodlums': False,
   'flush': False,
   'infuriatingly': False,
   'arne': False,
   'geez': False,
   'cares': False,
   'akiva': False,
   'reformed': False,
   'motherly': False,
   '$@^@': False,
   'fourth': False,
   'darby': False,
   'melodic': False,
   'vengeful': False,
   'idolizing': False,
   'whiskey': False,
   'dialects': False,
   'surreal': False,
   'duane': False,
   'ironed': False,
   'cui': False,
   'chortle': False,
   'prospers': False,
   'rug': False,
   'televisions': False,
   'competing': False,
   'petula': False,
   'milky': False,
   'cassini': False,
   'driver': False,
   'mediating': False,
   'stifler': False,
   'privates': False,
   'gymnastics': False,
   'injection': False,
   'petey': False,
   'dutifully': False,
   'cannibal': False,
   'asking': False,
   'cutthroat': False,
   'handedly': False,
   'multiply': False,
   'brute': False,
   'filing': False,
   'envying': False,
   'contantly': False,
   'boddy': False,
   'kuzco': False,
   '2056': False,
   'infrequent': False,
   'stroke': False,
   'zak': False,
   'note': False,
   'consigned': False,
   'superhuman': False,
   'colorful': False,
   'hohh': False,
   'honorable': False,
   'footage': False,
   'branch': False,
   'cloudy': False,
   'reek': False,
   'northmen': False,
   'ling': False,
   'kari': False,
   'shmaltzy': False,
   'flippancy': False,
   'weaponesque': False,
   'duffel': False,
   'eccentricities': False,
   'guerra': False,
   'dokken': False,
   'bleibtreu': False,
   'din': False,
   'danced': False,
   'right': False,
   'witnesses': False,
   'bustling': False,
   'ned': False,
   'demands': False,
   'profession': False,
   'stems': False,
   'represents': False,
   'thieves': False,
   'stretching': False,
   'bard': False,
   'boingo': False,
   'snarf': False,
   'loft': False,
   'pouncing': False,
   'persuading': False,
   'grittiest': False,
   'suspends': False,
   '_very_': False,
   'lives': False,
   'techniques': True,
   'iq': False,
   'horseback': False,
   'nonentity': False,
   'barbieri': False,
   'warp': False,
   'microsoft': False,
   'pisspoor': False,
   'fingal': False,
   'prompted': False,
   'yokel': False,
   'ordinaryness': False,
   'flasher': False,
   'sane': False,
   'stonecutters': False,
   'agonizingly': False,
   'aime': False,
   'bedpost': False,
   'pin': False,
   'capitalize': False,
   'repulsive': False,
   'battlefields': False,
   'resuce': False,
   'developing': False,
   'robotically': False,
   'massacering': False,
   'afternoons': False,
   'dump': False,
   'fletch': False,
   'porch': False,
   'animated': False,
   'worshiping': False,
   'knees': False,
   'travelling': False,
   'repaint': False,
   'booked': False,
   'ceiling': False,
   'clinging': False,
   'kaela': False,
   'scheider': False,
   'evangelist': False,
   'dollar': False,
   'caerthan': False,
   'gudin': False,
   'transports': False,
   'guilifoyle': False,
   'harumph': False,
   '1500s': False,
   'unemployed': False,
   'glides': False,
   'warmers': False,
   'blaster': False,
   'radio': False,
   'favourite': False,
   'materialism': False,
   'unknowns': False,
   'tangents': False,
   'riches': False,
   'chubby': False,
   'envy': False,
   'deriding': False,
   'sired': False,
   'zellwegger': False,
   'lachman': False,
   'klumps': False,
   'colonists': False,
   'refrained': False,
   'eliel': False,
   'herman': False,
   'personalizes': False,
   'gazon': False,
   'op': False,
   'clutterbuck': False,
   'adaption': False,
   'fine': False,
   'informs': False,
   'clandestine': False,
   'hawaii': False,
   'townspeople': False,
   'fifths': False,
   'attachs': False,
   'assurance': False,
   'bullets': False,
   'vines': False,
   'ogden': False,
   'pows': False,
   'condemnation': False,
   'woodard': False,
   'amusement': False,
   'magruder': False,
   'approach': False,
   'massaging': False,
   'persuasions': False,
   'maglietta': False,
   'sake': False,
   'amok': False,
   'cogent': False,
   'doer': False,
   'lowlifes': False,
   'rat': False,
   'shift': False,
   'cachet': False,
   'glamorising': False,
   'interacted': False,
   'costa': False,
   'electrical': False,
   'puccini': False,
   'santas': False,
   'chapters': False,
   'flips': False,
   'ips': False,
   'disagreed': False,
   'midly': False,
   'internally': False,
   'nil': False,
   'graystoke': False,
   'nihilist': False,
   'delpech': False,
   'breakthrough': False,
   'stunts': False,
   'favorties': False,
   'mildly': False,
   'glenne': False,
   'suspects': False,
   'adheres': False,
   'vaguest': False,
   'juni': False,
   'injustices': False,
   'koepp': False,
   'pictures': False,
   'uphill': False,
   'smuggler': False,
   'alligator': False,
   'sito': False,
   'laughless': False,
   'juncture': False,
   'vertiginous': False,
   'complete': False,
   'afeminite': False,
   'crouches': False,
   'norbu': False,
   'trap': False,
   'shirase': False,
   'eroticism': False,
   'alfre': False,
   'cleaned': False,
   'decaying': False,
   'animatronics': False,
   'backstabbing': False,
   'surprises': False,
   'plank': False,
   'whos': False,
   'base': False,
   'dolittle': False,
   'julia': False,
   'permits': False,
   'shellulite': False,
   'tormentor': False,
   'farts': False,
   'rapport': False,
   'doa': False,
   'humoring': False,
   'textured': False,
   'oumph': False,
   'retrieve': False,
   'keg': False,
   'nuzzling': False,
   'oilrig': False,
   'postulated': False,
   'reappear': False,
   'homosexuality': False,
   'lear': False,
   'lennix': False,
   'keyed': False,
   'organisations': False,
   'antonia': False,
   'differentiates': False,
   'depression': False,
   'mazursky': False,
   'poe': False,
   'hypochondriac': False,
   'recommendation': False,
   'inaction': False,
   'earmarks': False,
   'gto': False,
   'interrupting': False,
   'flinstone': False,
   'hanoi': False,
   'initializes': False,
   'slickster': False,
   'macbeth': False,
   'virulent': False,
   'chaney': False,
   'shy': False,
   'normally': False,
   'spoilers': False,
   'nebulous': False,
   'cretaceous': False,
   '104': False,
   'synthesized': False,
   'bunny': False,
   'goyer': False,
   'jumble': False,
   'colliding': False,
   '16mm': False,
   'interrogative': False,
   'synth': False,
   'shrieking': False,
   'haney': False,
   'balls': False,
   '_the_fugitive_': False,
   'islands': False,
   'gentlemen': False,
   'ruler': False,
   'pours': False,
   'unfolding': False,
   'unfazed': False,
   'hanged': False,
   'boiling': False,
   'showgirls': False,
   'eiko': False,
   'unglamorous': False,
   'inteligence': False,
   'ugh': False,
   'projectionist': False,
   'frauds': False,
   'overlong': False,
   'counfound': False,
   'pizzas': False,
   'irs': False,
   'inquisition': False,
   'benning': False,
   'rocque': False,
   'gillian': False,
   'heed': False,
   'gabe': False,
   'transsexual': False,
   'grimly': False,
   'hope': False,
   'pet': False,
   '75': False,
   'valour': False,
   'saccharine': False,
   'craftsman': False,
   'massages': False,
   'guarded': False,
   'falter': False,
   'democratic': False,
   'infestation': False,
   'whovier': False,
   'dimesional': False,
   'rossellinia': False,
   'concert': False,
   'schlubby': False,
   'pound': False,
   'abysmal': False,
   'plunged': False,
   'meticulousness': False,
   'degenerates': False,
   'beautiful': False,
   'precariously': False,
   'longshanks': False,
   'applied': False,
   'matured': False,
   'raven': False,
   'hutton': False,
   'feely': False,
   'crewmate': False,
   'henning': False,
   'mischevious': False,
   'merpeople': False,
   'brokedown': False,
   'outskirts': False,
   'coburn': False,
   'truck': False,
   'blackwolf': False,
   'picking': False,
   'anthropological': False,
   'disillusionment': False,
   'grammy': False,
   'reappearance': False,
   'baddies': False,
   'exhibited': False,
   'cleese': False,
   '_amadeus_': False,
   'affect': False,
   '666': False,
   'mayor': False,
   'joey': False,
   'delectable': False,
   'jp2': False,
   'bad': False,
   'pregnancy': False,
   'landwalker': False,
   'contenders': False,
   'waltzing': False,
   'bandwagon': False,
   'pre': False,
   'burr': False,
   'ahem': False,
   '35': False,
   'espoused': False,
   'mademoiselle': False,
   'visage': False,
   'guerrilla': False,
   'overdoes': False,
   'betray': False,
   'tieing': False,
   'unshrouded': False,
   'mozell': False,
   'retaining': False,
   'corrupted': False,
   'dads': False,
   'playtone': False,
   'barns': False,
   'despise': False,
   'gretta': False,
   'dismissed': False,
   'steals': False,
   'enabling': False,
   'omnivorous': False,
   'expounding': False,
   'situated': False,
   'unconventionality': False,
   'cain': False,
   'vantages': False,
   'yitzak': False,
   '========================': False,
   'apprehensively': False,
   'demonstrations': False,
   'concocting': False,
   'drivin': False,
   'sisterhood': False,
   'feedback': False,
   'smiled': False,
   'indebted': False,
   'creditors': False,
   'timothy': False,
   'scrooge': False,
   'roughly': False,
   'clyde': False,
   'derides': False,
   'lisping': False,
   'gauge': False,
   'buttercup': False,
   'ahab': False,
   'ramshackle': False,
   'miscasting': False,
   'aurora': False,
   'simm': False,
   'masculine': False,
   'starve': False,
   'flirtation': False,
   'catsuit': False,
   'hubbard': False,
   'intertwining': False,
   'corners': False,
   'currents': False,
   'cataloguing': False,
   'tretiak': False,
   'ironically': False,
   'web': False,
   'seing': False,
   'searing': False,
   'surfer': False,
   'assures': False,
   'earful': False,
   'quickening': False,
   'jung': False,
   'through': True,
   'wittily': False,
   'roofs': False,
   'chevalier': False,
   'taxed': False,
   'tubercular': False,
   'beeper': False,
   'protocol': False,
   'boyishly': False,
   'embarrsingly': False,
   'olden': False,
   'placards': False,
   'padre': False,
   'passed': False,
   'wiesz': False,
   'gratuities': False,
   'privy': False,
   'farquaads': False,
   'molly': False,
   '115': False,
   '=': False,
   'repetition': False,
   'exam': False,
   'outings': False,
   'orlando': False,
   'revenge': False,
   'cutely': False,
   'rocketry': False,
   'tyrone': False,
   'builds': False,
   'shariff': False,
   'operational': False,
   'trademarks': False,
   'shebang': False,
   'declines': False,
   'coughlan': False,
   'unacceptable': False,
   'crunchable': False,
   'awkwardly': False,
   'metamorphosizes': False,
   'womanfriend': False,
   'urinary': False,
   'dramatically': False,
   'confronting': False,
   'beachwalkers': False,
   'fs': False,
   'enterprises': False,
   'prejudice': False,
   'parka': False,
   'intonation': False,
   'dimetrodon': False,
   'virility': False,
   '1948': False,
   'slathers': False,
   'added': False,
   'clever': True,
   'seperate': False,
   'unwritten': False,
   'abandoned': False,
   'nascar': False,
   'quality': False,
   'jeter': False,
   'desparation': False,
   'bellamy': False,
   'fascist': False,
   'existentialist': False,
   'overlay': False,
   'herve': False,
   'circus': False,
   'grunt': False,
   'deluise': False,
   'eliminate': False,
   'supported': False,
   'summations': False,
   'manners': False,
   'trippier': False,
   'garret': False,
   'adversary': False,
   'support': False,
   'colqhoun': False,
   'recruitment': False,
   'clearer': False,
   '_i_know_what_you_did_last_summer_': False,
   'sunbathes': False,
   'shield': False,
   'prwhen': False,
   'representations': False,
   'slays': False,
   'corey': False,
   'fundamentalists': False,
   'geese': False,
   'postcard': False,
   'siouxsie': False,
   'suicidal': False,
   'dreyer': False,
   'pual': False,
   'mouthpieces': False,
   'delaware': False,
   'onegin': False,
   'hurls': False,
   'anchor': False,
   'lori': False,
   'ladybug': False,
   'abruptly': False,
   'schiffer': False,
   'burton': False,
   'swope': False,
   'slip': False,
   'general': False,
   'malevolent': False,
   'obliviousness': False,
   'glare': False,
   'fatal': False,
   'variants': False,
   'intrigue': False,
   'caine': False,
   'insulated': False,
   '56k': False,
   'discerning': False,
   'nice': False,
   'dismembered': False,
   'includes': False,
   'sweden': False,
   'slop': False,
   'mentions': False,
   'sentimentally': False,
   'equals': False,
   'purse': False,
   'dub': False,
   'seeds': False,
   'saintly': False,
   'captivating': False,
   'jamaica': False,
   'vietnam': False,
   'mt': False,
   'avoiding': False,
   'declan': False,
   'verne': False,
   'helms': False,
   'theirs': False,
   'breathtaking': False,
   '_people_': False,
   'gotten': False,
   'wong': False,
   'cheery': False,
   'archaeology': False,
   'recalling': False,
   'reared': False,
   'adoring': False,
   'dylan': False,
   'feds': False,
   'justice': False,
   'joff': False,
   'squad': False,
   'embarrased': False,
   'trial': False,
   'settings': False,
   'partnership': False,
   'unprofessionalism': False,
   'searching': False,
   'mourned': False,
   'comedy': True,
   'timbre': False,
   'acceptable': False,
   'juilliard': False,
   'satistfy': False,
   'fry': False,
   'unlovable': False,
   'ninjaman': False,
   'groaning': False,
   'detracts': False,
   'discovers': False,
   'hahaha': False,
   'reflex': False,
   'rowdy': False,
   'burbano': False,
   'churchman': False,
   'aristocrat': False,
   'achieving': False,
   'unimpressive': False,
   'edd': False,
   'aykroyd': False,
   'directionless': False,
   'balbricker': False,
   'sequences': False,
   'shoe': False,
   'sway': False,
   'suit': False,
   ...},
  'pos')]

Training the classifier


In [8]:
training_set = feature_sets[:1900]
testing_set = feature_sets[1900:]

In [18]:
## TO-DO: To build own naive bais algorithm
# classifier = nltk.NaiveBayesClassifier.train(training_set)

MNB_classifier = SklearnClassifier(MultinomialNB())
MNB_classifier.train(training_set)

## saving it in a pickle
MNB_pickle = open("MNB_pickle.pickle", "wb")
pickle.dump(MNB_classifier, MNB_pickle)
MNB_pickle.close()
print("Multinomial classifier accuracy : ", (nltk.classify.accuracy(MNB_classifier, testing_set))*100)


Multinomial classifier accuracy :  75.0

In [19]:
## BernoulliNB 

BNB_classifier = SklearnClassifier(BernoulliNB())
BNB_classifier.train(training_set)

BNB_pickle = open("BNB_pickle.pickle", "wb")
pickle.dump(BNB_classifier, BNB_pickle)
BNB_pickle.close()

print("Bernoulli classifier accuracy : ", (nltk.classify.accuracy(BNB_classifier, testing_set))*100)


Bernoulli classifier accuracy :  71.0

In [21]:
LogisticRegression_classifier = SklearnClassifier(LogisticRegression())
LogisticRegression_classifier.train(training_set)

LogisticRegression_pickle = open("LogisticRegression.pickle", "wb")
pickle.dump(LogisticRegression_classifier, LogisticRegression_pickle)
LogisticRegression_pickle.close()

print("LogisticRegression_classifier accuracy percent:", (nltk.classify.accuracy(LogisticRegression_classifier, testing_set))*100)

SGDClassifier_classifier = SklearnClassifier(SGDClassifier())
SGDClassifier_classifier.train(training_set)

SGDClassifier_pickle = open("SGDClassifier.pickle", "wb")
pickle.dump(SGDClassifier_classifier, SGDClassifier_pickle)
SGDClassifier_pickle.close()

print("SGDClassifier_classifier accuracy percent:", (nltk.classify.accuracy(SGDClassifier_classifier, testing_set))*100)

SVC_classifier = SklearnClassifier(SVC())
SVC_classifier.train(training_set)

SVC_classifier_pickle = open("SVC_classifier.pickle", "wb")
pickle.dump(SVC_classifier, SVC_classifier_pickle)
SVC_classifier_pickle.close()

print("SVC_classifier accuracy percent:", (nltk.classify.accuracy(SVC_classifier, testing_set))*100)

LinearSVC_classifier = SklearnClassifier(LinearSVC())
LinearSVC_classifier.train(training_set)

LinearSVC_pickle = open("LinearSVC.pickle", "wb")
pickle.dump(LinearSVC_classifier, LinearSVC_pickle)
LinearSVC_pickle.close()

print("LinearSVC_classifier accuracy percent:", (nltk.classify.accuracy(LinearSVC_classifier, testing_set))*100)

NuSVC_classifier = SklearnClassifier(NuSVC())
NuSVC_classifier.train(training_set)

NuSVC_pickle = open("LinearSVC.pickle", "wb")
pickle.dump(NuSVC_classifier, NuSVC_pickle)
NuSVC_pickle.close()

print("NuSVC_classifier accuracy percent:", (nltk.classify.accuracy(NuSVC_classifier, testing_set))*100)


LogisticRegression_classifier accuracy percent: 65.0
SGDClassifier_classifier accuracy percent: 67.0
SVC_classifier accuracy percent: 46.0
LinearSVC_classifier accuracy percent: 66.0
NuSVC_classifier accuracy percent: 65.0

In [22]:
### using the old naive_bayes classifier
naive_bayes_pickle = open("naive_bayes.pickle", "rb")
naive_bayes_classifier = pickle.load(naive_bayes_pickle)
naive_bayes_pickle.close()

print("Naive bayes classifier accuracy percent:", (nltk.classify.accuracy(naive_bayes_classifier, testing_set))*100)


Naive bayes classifier accuracy percent: 70.0

Putting it all together to make a voting system for increasing accuracy