In [0]:
import tensorflow as tf
from tensorflow import keras
import numpy as np

In [0]:
dataset = keras.datasets.imdb

In [0]:
(train_data,train_labels), (test_data,test_labels) = dataset.load_data()

In [15]:
train_data.shape


Out[15]:
(25000,)

In [16]:
test_data.shape


Out[16]:
(25000,)

In [0]:
word_index = dataset.get_word_index()

In [18]:
word_index


Out[18]:
{'fawn': 34701,
 'tsukino': 52006,
 'nunnery': 52007,
 'sonja': 16816,
 'vani': 63951,
 'woods': 1408,
 'spiders': 16115,
 'hanging': 2345,
 'woody': 2289,
 'trawling': 52008,
 "hold's": 52009,
 'comically': 11307,
 'localized': 40830,
 'disobeying': 30568,
 "'royale": 52010,
 "harpo's": 40831,
 'canet': 52011,
 'aileen': 19313,
 'acurately': 52012,
 "diplomat's": 52013,
 'rickman': 25242,
 'arranged': 6746,
 'rumbustious': 52014,
 'familiarness': 52015,
 "spider'": 52016,
 'hahahah': 68804,
 "wood'": 52017,
 'transvestism': 40833,
 "hangin'": 34702,
 'bringing': 2338,
 'seamier': 40834,
 'wooded': 34703,
 'bravora': 52018,
 'grueling': 16817,
 'wooden': 1636,
 'wednesday': 16818,
 "'prix": 52019,
 'altagracia': 34704,
 'circuitry': 52020,
 'crotch': 11585,
 'busybody': 57766,
 "tart'n'tangy": 52021,
 'burgade': 14129,
 'thrace': 52023,
 "tom's": 11038,
 'snuggles': 52025,
 'francesco': 29114,
 'complainers': 52027,
 'templarios': 52125,
 '272': 40835,
 '273': 52028,
 'zaniacs': 52130,
 '275': 34706,
 'consenting': 27631,
 'snuggled': 40836,
 'inanimate': 15492,
 'uality': 52030,
 'bronte': 11926,
 'errors': 4010,
 'dialogs': 3230,
 "yomada's": 52031,
 "madman's": 34707,
 'dialoge': 30585,
 'usenet': 52033,
 'videodrome': 40837,
 "kid'": 26338,
 'pawed': 52034,
 "'girlfriend'": 30569,
 "'pleasure": 52035,
 "'reloaded'": 52036,
 "kazakos'": 40839,
 'rocque': 52037,
 'mailings': 52038,
 'brainwashed': 11927,
 'mcanally': 16819,
 "tom''": 52039,
 'kurupt': 25243,
 'affiliated': 21905,
 'babaganoosh': 52040,
 "noe's": 40840,
 'quart': 40841,
 'kids': 359,
 'uplifting': 5034,
 'controversy': 7093,
 'kida': 21906,
 'kidd': 23379,
 "error'": 52041,
 'neurologist': 52042,
 'spotty': 18510,
 'cobblers': 30570,
 'projection': 9878,
 'fastforwarding': 40842,
 'sters': 52043,
 "eggar's": 52044,
 'etherything': 52045,
 'gateshead': 40843,
 'airball': 34708,
 'unsinkable': 25244,
 'stern': 7180,
 "cervi's": 52046,
 'dnd': 40844,
 'dna': 11586,
 'insecurity': 20598,
 "'reboot'": 52047,
 'trelkovsky': 11037,
 'jaekel': 52048,
 'sidebars': 52049,
 "sforza's": 52050,
 'distortions': 17633,
 'mutinies': 52051,
 'sermons': 30602,
 '7ft': 40846,
 'boobage': 52052,
 "o'bannon's": 52053,
 'populations': 23380,
 'chulak': 52054,
 'mesmerize': 27633,
 'quinnell': 52055,
 'yahoo': 10307,
 'meteorologist': 52057,
 'beswick': 42577,
 'boorman': 15493,
 'voicework': 40847,
 "ster'": 52058,
 'blustering': 22922,
 'hj': 52059,
 'intake': 27634,
 'morally': 5621,
 'jumbling': 40849,
 'bowersock': 52060,
 "'porky's'": 52061,
 'gershon': 16821,
 'ludicrosity': 40850,
 'coprophilia': 52062,
 'expressively': 40851,
 "india's": 19500,
 "post's": 34710,
 'wana': 52063,
 'wang': 5283,
 'wand': 30571,
 'wane': 25245,
 'edgeways': 52321,
 'titanium': 34711,
 'pinta': 40852,
 'want': 178,
 'pinto': 30572,
 'whoopdedoodles': 52065,
 'tchaikovsky': 21908,
 'travel': 2103,
 "'victory'": 52066,
 'copious': 11928,
 'gouge': 22433,
 "chapters'": 52067,
 'barbra': 6702,
 'uselessness': 30573,
 "wan'": 52068,
 'assimilated': 27635,
 'petiot': 16116,
 'most\x85and': 52069,
 'dinosaurs': 3930,
 'wrong': 352,
 'seda': 52070,
 'stollen': 52071,
 'sentencing': 34712,
 'ouroboros': 40853,
 'assimilates': 40854,
 'colorfully': 40855,
 'glenne': 27636,
 'dongen': 52072,
 'subplots': 4760,
 'kiloton': 52073,
 'chandon': 23381,
 "effect'": 34713,
 'snugly': 27637,
 'kuei': 40856,
 'welcomed': 9092,
 'dishonor': 30071,
 'concurrence': 52075,
 'stoicism': 23382,
 "guys'": 14896,
 "beroemd'": 52077,
 'butcher': 6703,
 "melfi's": 40857,
 'aargh': 30623,
 'playhouse': 20599,
 'wickedly': 11308,
 'fit': 1180,
 'labratory': 52078,
 'lifeline': 40859,
 'screaming': 1927,
 'fix': 4287,
 'cineliterate': 52079,
 'fic': 52080,
 'fia': 52081,
 'fig': 34714,
 'fmvs': 52082,
 'fie': 52083,
 'reentered': 52084,
 'fin': 30574,
 'doctresses': 52085,
 'fil': 52086,
 'zucker': 12606,
 'ached': 31931,
 'counsil': 52088,
 'paterfamilias': 52089,
 'songwriter': 13885,
 'shivam': 34715,
 'hurting': 9654,
 'effects': 299,
 'slauther': 52090,
 "'flame'": 52091,
 'sommerset': 52092,
 'interwhined': 52093,
 'whacking': 27638,
 'bartok': 52094,
 'barton': 8775,
 'frewer': 21909,
 "fi'": 52095,
 'ingrid': 6192,
 'stribor': 30575,
 'approporiately': 52096,
 'wobblyhand': 52097,
 'tantalisingly': 52098,
 'ankylosaurus': 52099,
 'parasites': 17634,
 'childen': 52100,
 "jenkins'": 52101,
 'metafiction': 52102,
 'golem': 17635,
 'indiscretion': 40860,
 "reeves'": 23383,
 "inamorata's": 57781,
 'brittannica': 52104,
 'adapt': 7916,
 "russo's": 30576,
 'guitarists': 48246,
 'abbott': 10553,
 'abbots': 40861,
 'lanisha': 17649,
 'magickal': 40863,
 'mattter': 52105,
 "'willy": 52106,
 'pumpkins': 34716,
 'stuntpeople': 52107,
 'estimate': 30577,
 'ugghhh': 40864,
 'gameplay': 11309,
 "wern't": 52108,
 "n'sync": 40865,
 'sickeningly': 16117,
 'chiara': 40866,
 'disturbed': 4011,
 'portmanteau': 40867,
 'ineffectively': 52109,
 "duchonvey's": 82143,
 "nasty'": 37519,
 'purpose': 1285,
 'lazers': 52112,
 'lightened': 28105,
 'kaliganj': 52113,
 'popularism': 52114,
 "damme's": 18511,
 'stylistics': 30578,
 'mindgaming': 52115,
 'spoilerish': 46449,
 "'corny'": 52117,
 'boerner': 34718,
 'olds': 6792,
 'bakelite': 52118,
 'renovated': 27639,
 'forrester': 27640,
 "lumiere's": 52119,
 'gaskets': 52024,
 'needed': 884,
 'smight': 34719,
 'master': 1297,
 "edie's": 25905,
 'seeber': 40868,
 'hiya': 52120,
 'fuzziness': 52121,
 'genesis': 14897,
 'rewards': 12607,
 'enthrall': 30579,
 "'about": 40869,
 "recollection's": 52122,
 'mutilated': 11039,
 'fatherlands': 52123,
 "fischer's": 52124,
 'positively': 5399,
 '270': 34705,
 'ahmed': 34720,
 'zatoichi': 9836,
 'bannister': 13886,
 'anniversaries': 52127,
 "helm's": 30580,
 "'work'": 52128,
 'exclaimed': 34721,
 "'unfunny'": 52129,
 '274': 52029,
 'feeling': 544,
 "wanda's": 52131,
 'dolan': 33266,
 '278': 52133,
 'peacoat': 52134,
 'brawny': 40870,
 'mishra': 40871,
 'worlders': 40872,
 'protags': 52135,
 'skullcap': 52136,
 'dastagir': 57596,
 'affairs': 5622,
 'wholesome': 7799,
 'hymen': 52137,
 'paramedics': 25246,
 'unpersons': 52138,
 'heavyarms': 52139,
 'affaire': 52140,
 'coulisses': 52141,
 'hymer': 40873,
 'kremlin': 52142,
 'shipments': 30581,
 'pixilated': 52143,
 "'00s": 30582,
 'diminishing': 18512,
 'cinematic': 1357,
 'resonates': 14898,
 'simplify': 40874,
 "nature'": 40875,
 'temptresses': 40876,
 'reverence': 16822,
 'resonated': 19502,
 'dailey': 34722,
 '2\x85': 52144,
 'treize': 27641,
 'majo': 52145,
 'kiya': 21910,
 'woolnough': 52146,
 'thanatos': 39797,
 'sandoval': 35731,
 'dorama': 40879,
 "o'shaughnessy": 52147,
 'tech': 4988,
 'fugitives': 32018,
 'teck': 30583,
 "'e'": 76125,
 'doesn’t': 40881,
 'purged': 52149,
 'saying': 657,
 "martians'": 41095,
 'norliss': 23418,
 'dickey': 27642,
 'dicker': 52152,
 "'sependipity": 52153,
 'padded': 8422,
 'ordell': 57792,
 "sturges'": 40882,
 'independentcritics': 52154,
 'tempted': 5745,
 "atkinson's": 34724,
 'hounded': 25247,
 'apace': 52155,
 'clicked': 15494,
 "'humor'": 30584,
 "martino's": 17177,
 "'supporting": 52156,
 'warmongering': 52032,
 "zemeckis's": 34725,
 'lube': 21911,
 'shocky': 52157,
 'plate': 7476,
 'plata': 40883,
 'sturgess': 40884,
 "nerds'": 40885,
 'plato': 20600,
 'plath': 34726,
 'platt': 40886,
 'mcnab': 52159,
 'clumsiness': 27643,
 'altogether': 3899,
 'massacring': 42584,
 'bicenntinial': 52160,
 'skaal': 40887,
 'droning': 14360,
 'lds': 8776,
 'jaguar': 21912,
 "cale's": 34727,
 'nicely': 1777,
 'mummy': 4588,
 "lot's": 18513,
 'patch': 10086,
 'kerkhof': 50202,
 "leader's": 52161,
 "'movie": 27644,
 'uncomfirmed': 52162,
 'heirloom': 40888,
 'wrangle': 47360,
 'emotion\x85': 52163,
 "'stargate'": 52164,
 'pinoy': 40889,
 'conchatta': 40890,
 'broeke': 41128,
 'advisedly': 40891,
 "barker's": 17636,
 'descours': 52166,
 'lots': 772,
 'lotr': 9259,
 'irs': 9879,
 'lott': 52167,
 'xvi': 40892,
 'irk': 34728,
 'irl': 52168,
 'ira': 6887,
 'belzer': 21913,
 'irc': 52169,
 'ire': 27645,
 'requisites': 40893,
 'discipline': 7693,
 'lyoko': 52961,
 'extend': 11310,
 'nature': 873,
 "'dickie'": 52170,
 'optimist': 40894,
 'lapping': 30586,
 'superficial': 3900,
 'vestment': 52171,
 'extent': 2823,
 'tendons': 52172,
 "heller's": 52173,
 'quagmires': 52174,
 'miyako': 52175,
 'moocow': 20601,
 "coles'": 52176,
 'lookit': 40895,
 'ravenously': 52177,
 'levitating': 40896,
 'perfunctorily': 52178,
 'lookin': 30587,
 "lot'": 40898,
 'lookie': 52179,
 'fearlessly': 34870,
 'libyan': 52181,
 'fondles': 40899,
 'gopher': 35714,
 'wearying': 40901,
 "nz's": 52182,
 'minuses': 27646,
 'puposelessly': 52183,
 'shandling': 52184,
 'decapitates': 31268,
 'humming': 11929,
 "'nother": 40902,
 'smackdown': 21914,
 'underdone': 30588,
 'frf': 40903,
 'triviality': 52185,
 'fro': 25248,
 'bothers': 8777,
 "'kensington": 52186,
 'much': 73,
 'muco': 34730,
 'wiseguy': 22615,
 "richie's": 27648,
 'tonino': 40904,
 'unleavened': 52187,
 'fry': 11587,
 "'tv'": 40905,
 'toning': 40906,
 'obese': 14361,
 'sensationalized': 30589,
 'spiv': 40907,
 'spit': 6259,
 'arkin': 7364,
 'charleton': 21915,
 'jeon': 16823,
 'boardroom': 21916,
 'doubts': 4989,
 'spin': 3084,
 'hepo': 53083,
 'wildcat': 27649,
 'venoms': 10584,
 'misconstrues': 52191,
 'mesmerising': 18514,
 'misconstrued': 40908,
 'rescinds': 52192,
 'prostrate': 52193,
 'majid': 40909,
 'climbed': 16479,
 'canoeing': 34731,
 'majin': 52195,
 'animie': 57804,
 'sylke': 40910,
 'conditioned': 14899,
 'waddell': 40911,
 '3\x85': 52196,
 'hyperdrive': 41188,
 'conditioner': 34732,
 'bricklayer': 53153,
 'hong': 2576,
 'memoriam': 52198,
 'inventively': 30592,
 "levant's": 25249,
 'portobello': 20638,
 'remand': 52200,
 'mummified': 19504,
 'honk': 27650,
 'spews': 19505,
 'visitations': 40912,
 'mummifies': 52201,
 'cavanaugh': 25250,
 'zeon': 23385,
 "jungle's": 40913,
 'viertel': 34733,
 'frenchmen': 27651,
 'torpedoes': 52202,
 'schlessinger': 52203,
 'torpedoed': 34734,
 'blister': 69876,
 'cinefest': 52204,
 'furlough': 34735,
 'mainsequence': 52205,
 'mentors': 40914,
 'academic': 9094,
 'stillness': 20602,
 'academia': 40915,
 'lonelier': 52206,
 'nibby': 52207,
 "losers'": 52208,
 'cineastes': 40916,
 'corporate': 4449,
 'massaging': 40917,
 'bellow': 30593,
 'absurdities': 19506,
 'expetations': 53241,
 'nyfiken': 40918,
 'mehras': 75638,
 'lasse': 52209,
 'visability': 52210,
 'militarily': 33946,
 "elder'": 52211,
 'gainsbourg': 19023,
 'hah': 20603,
 'hai': 13420,
 'haj': 34736,
 'hak': 25251,
 'hal': 4311,
 'ham': 4892,
 'duffer': 53259,
 'haa': 52213,
 'had': 66,
 'advancement': 11930,
 'hag': 16825,
 "hand'": 25252,
 'hay': 13421,
 'mcnamara': 20604,
 "mozart's": 52214,
 'duffel': 30731,
 'haq': 30594,
 'har': 13887,
 'has': 44,
 'hat': 2401,
 'hav': 40919,
 'haw': 30595,
 'figtings': 52215,
 'elders': 15495,
 'underpanted': 52216,
 'pninson': 52217,
 'unequivocally': 27652,
 "barbara's": 23673,
 "bello'": 52219,
 'indicative': 12997,
 'yawnfest': 40920,
 'hexploitation': 52220,
 "loder's": 52221,
 'sleuthing': 27653,
 "justin's": 32622,
 "'ball": 52222,
 "'summer": 52223,
 "'demons'": 34935,
 "mormon's": 52225,
 "laughton's": 34737,
 'debell': 52226,
 'shipyard': 39724,
 'unabashedly': 30597,
 'disks': 40401,
 'crowd': 2290,
 'crowe': 10087,
 "vancouver's": 56434,
 'mosques': 34738,
 'crown': 6627,
 'culpas': 52227,
 'crows': 27654,
 'surrell': 53344,
 'flowless': 52229,
 'sheirk': 52230,
 "'three": 40923,
 "peterson'": 52231,
 'ooverall': 52232,
 'perchance': 40924,
 'bottom': 1321,
 'chabert': 53363,
 'sneha': 52233,
 'inhuman': 13888,
 'ichii': 52234,
 'ursla': 52235,
 'completly': 30598,
 'moviedom': 40925,
 'raddick': 52236,
 'brundage': 51995,
 'brigades': 40926,
 'starring': 1181,
 "'goal'": 52237,
 'caskets': 52238,
 'willcock': 52239,
 "threesome's": 52240,
 "mosque'": 52241,
 "cover's": 52242,
 'spaceships': 17637,
 'anomalous': 40927,
 'ptsd': 27655,
 'shirdan': 52243,
 'obscenity': 21962,
 'lemmings': 30599,
 'duccio': 30600,
 "levene's": 52244,
 "'gorby'": 52245,
 "teenager's": 25255,
 'marshall': 5340,
 'honeymoon': 9095,
 'shoots': 3231,
 'despised': 12258,
 'okabasho': 52246,
 'fabric': 8289,
 'cannavale': 18515,
 'raped': 3537,
 "tutt's": 52247,
 'grasping': 17638,
 'despises': 18516,
 "thief's": 40928,
 'rapes': 8926,
 'raper': 52248,
 "eyre'": 27656,
 'walchek': 52249,
 "elmo's": 23386,
 'perfumes': 40929,
 'spurting': 21918,
 "exposition'\x85": 52250,
 'denoting': 52251,
 'thesaurus': 34740,
 "shoot'": 40930,
 'bonejack': 49759,
 'simpsonian': 52253,
 'hebetude': 30601,
 "hallow's": 34741,
 'desperation\x85': 52254,
 'incinerator': 34742,
 'congratulations': 10308,
 'humbled': 52255,
 "else's": 5924,
 'trelkovski': 40845,
 "rape'": 52256,
 "'chapters'": 59386,
 '1600s': 52257,
 'martian': 7253,
 'nicest': 25256,
 'eyred': 52259,
 'passenger': 9457,
 'disgrace': 6041,
 'moderne': 52260,
 'barrymore': 5120,
 'yankovich': 52261,
 'moderns': 40931,
 'studliest': 52262,
 'bedsheet': 52263,
 'decapitation': 14900,
 'slurring': 52264,
 "'nunsploitation'": 52265,
 "'character'": 34743,
 'cambodia': 9880,
 'rebelious': 52266,
 'pasadena': 27657,
 'crowne': 40932,
 "'bedchamber": 52267,
 'conjectural': 52268,
 'appologize': 52269,
 'halfassing': 52270,
 'paycheque': 57816,
 'palms': 20606,
 "'islands": 52271,
 'hawked': 40933,
 'palme': 21919,
 'conservatively': 40934,
 'larp': 64007,
 'palma': 5558,
 'smelling': 21920,
 'aragorn': 12998,
 'hawker': 52272,
 'hawkes': 52273,
 'explosions': 3975,
 'loren': 8059,
 "pyle's": 52274,
 'shootout': 6704,
 "mike's": 18517,
 "driscoll's": 52275,
 'cogsworth': 40935,
 "britian's": 52276,
 'childs': 34744,
 "portrait's": 52277,
 'chain': 3626,
 'whoever': 2497,
 'puttered': 52278,
 'childe': 52279,
 'maywether': 52280,
 'chair': 3036,
 "rance's": 52281,
 'machu': 34745,
 'ballet': 4517,
 'grapples': 34746,
 'summerize': 76152,
 'freelance': 30603,
 "andrea's": 52283,
 '\x91very': 52284,
 'coolidge': 45879,
 'mache': 18518,
 'balled': 52285,
 'grappled': 40937,
 'macha': 18519,
 'underlining': 21921,
 'macho': 5623,
 'oversight': 19507,
 'machi': 25257,
 'verbally': 11311,
 'tenacious': 21922,
 'windshields': 40938,
 'paychecks': 18557,
 'jerk': 3396,
 "good'": 11931,
 'prancer': 34748,
 'prances': 21923,
 'olympus': 52286,
 'lark': 21924,
 'embark': 10785,
 'gloomy': 7365,
 'jehaan': 52287,
 'turaqui': 52288,
 "child'": 20607,
 'locked': 2894,
 'pranced': 52289,
 'exact': 2588,
 'unattuned': 52290,
 'minute': 783,
 'skewed': 16118,
 'hodgins': 40940,
 'skewer': 34749,
 'think\x85': 52291,
 'rosenstein': 38765,
 'helmit': 52292,
 'wrestlemanias': 34750,
 'hindered': 16826,
 "martha's": 30604,
 'cheree': 52293,
 "pluckin'": 52294,
 'ogles': 40941,
 'heavyweight': 11932,
 'aada': 82190,
 'chopping': 11312,
 'strongboy': 61534,
 'hegemonic': 41342,
 'adorns': 40942,
 'xxth': 41346,
 'nobuhiro': 34751,
 'capitães': 52298,
 'kavogianni': 52299,
 'antwerp': 13422,
 'celebrated': 6538,
 'roarke': 52300,
 'baggins': 40943,
 'cheeseburgers': 31270,
 'matras': 52301,
 "nineties'": 52302,
 "'craig'": 52303,
 'celebrates': 12999,
 'unintentionally': 3383,
 'drafted': 14362,
 'climby': 52304,
 '303': 52305,
 'oldies': 18520,
 'climbs': 9096,
 'honour': 9655,
 'plucking': 34752,
 '305': 30074,
 'address': 5514,
 'menjou': 40944,
 "'freak'": 42592,
 'dwindling': 19508,
 'benson': 9458,
 'white’s': 52307,
 'shamelessness': 40945,
 'impacted': 21925,
 'upatz': 52308,
 'cusack': 3840,
 "flavia's": 37567,
 'effette': 52309,
 'influx': 34753,
 'boooooooo': 52310,
 'dimitrova': 52311,
 'houseman': 13423,
 'bigas': 25259,
 'boylen': 52312,
 'phillipenes': 52313,
 'fakery': 40946,
 "grandpa's": 27658,
 'darnell': 27659,
 'undergone': 19509,
 'handbags': 52315,
 'perished': 21926,
 'pooped': 37778,
 'vigour': 27660,
 'opposed': 3627,
 'etude': 52316,
 "caine's": 11799,
 'doozers': 52317,
 'photojournals': 34754,
 'perishes': 52318,
 'constrains': 34755,
 'migenes': 40948,
 'consoled': 30605,
 'alastair': 16827,
 'wvs': 52319,
 'ooooooh': 52320,
 'approving': 34756,
 'consoles': 40949,
 'disparagement': 52064,
 'futureistic': 52322,
 'rebounding': 52323,
 "'date": 52324,
 'gregoire': 52325,
 'rutherford': 21927,
 'americanised': 34757,
 'novikov': 82196,
 'following': 1042,
 'munroe': 34758,
 "morita'": 52326,
 'christenssen': 52327,
 'oatmeal': 23106,
 'fossey': 25260,
 'livered': 40950,
 'listens': 13000,
 "'marci": 76164,
 "otis's": 52330,
 'thanking': 23387,
 'maude': 16019,
 'extensions': 34759,
 'ameteurish': 52332,
 "commender's": 52333,
 'agricultural': 27661,
 'convincingly': 4518,
 'fueled': 17639,
 'mahattan': 54014,
 "paris's": 40952,
 'vulkan': 52336,
 'stapes': 52337,
 'odysessy': 52338,
 'harmon': 12259,
 'surfing': 4252,
 'halloran': 23494,
 'unbelieveably': 49580,
 "'offed'": 52339,
 'quadrant': 30607,
 'inhabiting': 19510,
 'nebbish': 34760,
 'forebears': 40953,
 'skirmish': 34761,
 'ocassionally': 52340,
 "'resist": 52341,
 'impactful': 21928,
 'spicier': 52342,
 'touristy': 40954,
 "'football'": 52343,
 'webpage': 40955,
 'exurbia': 52345,
 'jucier': 52346,
 'professors': 14901,
 'structuring': 34762,
 'jig': 30608,
 'overlord': 40956,
 'disconnect': 25261,
 'sniffle': 82201,
 'slimeball': 40957,
 'jia': 40958,
 'milked': 16828,
 'banjoes': 40959,
 'jim': 1237,
 'workforces': 52348,
 'jip': 52349,
 'rotweiller': 52350,
 'mundaneness': 34763,
 "'ninja'": 52351,
 "dead'": 11040,
 "cipriani's": 40960,
 'modestly': 20608,
 "professor'": 52352,
 'shacked': 40961,
 'bashful': 34764,
 'sorter': 23388,
 'overpowering': 16120,
 'workmanlike': 18521,
 'henpecked': 27662,
 'sorted': 18522,
 "jōb's": 52354,
 "'always": 52355,
 "'baptists": 34765,
 'dreamcatchers': 52356,
 "'silence'": 52357,
 'hickory': 21929,
 'fun\x97yet': 52358,
 'breakumentary': 52359,
 'didn': 15496,
 'didi': 52360,
 'pealing': 52361,
 'dispite': 40962,
 "italy's": 25262,
 'instability': 21930,
 'quarter': 6539,
 'quartet': 12608,
 'padmé': 52362,
 "'bleedmedry": 52363,
 'pahalniuk': 52364,
 'honduras': 52365,
 'bursting': 10786,
 "pablo's": 41465,
 'irremediably': 52367,
 'presages': 40963,
 'bowlegged': 57832,
 'dalip': 65183,
 'entering': 6260,
 'newsradio': 76172,
 'presaged': 54150,
 "giallo's": 27663,
 'bouyant': 40964,
 'amerterish': 52368,
 'rajni': 18523,
 'leeves': 30610,
 'macauley': 34767,
 'seriously': 612,
 'sugercoma': 52369,
 'grimstead': 52370,
 "'fairy'": 52371,
 'zenda': 30611,
 "'twins'": 52372,
 'realisation': 17640,
 'highsmith': 27664,
 'raunchy': 7817,
 'incentives': 40965,
 'flatson': 52374,
 'snooker': 35097,
 'crazies': 16829,
 'crazier': 14902,
 'grandma': 7094,
 'napunsaktha': 52375,
 'workmanship': 30612,
 'reisner': 52376,
 "sanford's": 61306,
 '\x91doña': 52377,
 'modest': 6108,
 "everything's": 19153,
 'hamer': 40966,
 "couldn't'": 52379,
 'quibble': 13001,
 'socking': 52380,
 'tingler': 21931,
 'gutman': 52381,
 'lachlan': 40967,
 'tableaus': 52382,
 'headbanger': 52383,
 'spoken': 2847,
 'cerebrally': 34768,
 "'road": 23490,
 'tableaux': 21932,
 "proust's": 40968,
 'periodical': 40969,
 "shoveller's": 52385,
 'tamara': 25263,
 'affords': 17641,
 'concert': 3249,
 "yara's": 87955,
 'someome': 52386,
 'lingering': 8424,
 "abraham's": 41511,
 'beesley': 34769,
 'cherbourg': 34770,
 'kagan': 28624,
 'snatch': 9097,
 "miyazaki's": 9260,
 'absorbs': 25264,
 "koltai's": 40970,
 'tingled': 64027,
 'crossroads': 19511,
 'rehab': 16121,
 'falworth': 52389,
 'sequals': 52390,
 ...}

In [0]:


In [0]:
reverse_word_index

In [0]:
word_index = {k:(v+3) for k,v in word_index.items()}

In [0]:
word_index["<PAD>"] = 0

In [0]:
word_index["<PAD>"]

In [0]:
train_data[0]

In [0]:
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3
reverse_word_index = dict([(value,key) for (key,value) in word_index.items()])

In [0]:
def decode_review(text):
  return ' '.join([reverse_word_index.get(i,'?') for i in text])

In [22]:
decode_review(train_data[0])


Out[22]:
"<START> this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert redford's is an amazing actor and now the same being director norman's father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for retail and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also congratulations to the two little boy's that played the part's of norman and paul they were just brilliant children are often left out of the praising list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all"

In [0]:
# A dictionary mapping words to an integer index
word_index = dataset.get_word_index()

# The first indices are reserved
word_index = {k:(v+3) for k,v in word_index.items()} 
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2  # unknown
word_index["<UNUSED>"] = 3

reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])

def decode_review(text):
    return ' '.join([reverse_word_index.get(i, '?') for i in text])

In [0]:
decode_review(train_data[0])


Out[0]:
"<START> this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert redford's is an amazing actor and now the same being director norman's father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for retail and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also congratulations to the two little boy's that played the part's of norman and paul they were just brilliant children are often left out of the praising list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all"

In [0]:
train_data = keras.preprocessing.sequence.pad_sequences(train_data,
                                                        value=word_index["<PAD>"],
                                                        padding='post',
                                                        maxlen=256)

test_data = keras.preprocessing.sequence.pad_sequences(test_data,
                                                       value=word_index["<PAD>"],
                                                       padding='post',
                                                       maxlen=256)

In [39]:
vocab_size = 100000

model = keras.Sequential()
model.add(keras.layers.Embedding(vocab_size, 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation=tf.nn.relu))
model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid))

model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_3 (Embedding)      (None, None, 16)          1600000   
_________________________________________________________________
global_average_pooling1d_3 ( (None, 16)                0         
_________________________________________________________________
dense_6 (Dense)              (None, 16)                272       
_________________________________________________________________
dense_7 (Dense)              (None, 1)                 17        
=================================================================
Total params: 1,600,289
Trainable params: 1,600,289
Non-trainable params: 0
_________________________________________________________________

In [0]:
model.compile(optimizer=tf.train.AdamOptimizer(),
              loss='binary_crossentropy',
              metrics=['accuracy'])

In [0]:
x_val = train_data[:10000]
partial_x_train = train_data[10000:]

y_val = train_labels[:10000]
partial_y_train = train_labels[10000:]

In [42]:
history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=40,
                    batch_size=1024,
                    validation_data=(x_val, y_val),
                    verbose=1)


Train on 15000 samples, validate on 10000 samples
Epoch 1/40
15000/15000 [==============================] - 1s 93us/step - loss: 0.6924 - acc: 0.5355 - val_loss: 0.6911 - val_acc: 0.6015
Epoch 2/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.6891 - acc: 0.6336 - val_loss: 0.6874 - val_acc: 0.7201
Epoch 3/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.6838 - acc: 0.7521 - val_loss: 0.6818 - val_acc: 0.7514
Epoch 4/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.6760 - acc: 0.7896 - val_loss: 0.6740 - val_acc: 0.7690
Epoch 5/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.6652 - acc: 0.8043 - val_loss: 0.6631 - val_acc: 0.7681
Epoch 6/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.6506 - acc: 0.8088 - val_loss: 0.6491 - val_acc: 0.7821
Epoch 7/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.6321 - acc: 0.8187 - val_loss: 0.6317 - val_acc: 0.7820
Epoch 8/40
15000/15000 [==============================] - 1s 69us/step - loss: 0.6098 - acc: 0.8264 - val_loss: 0.6115 - val_acc: 0.7936
Epoch 9/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.5840 - acc: 0.8362 - val_loss: 0.5890 - val_acc: 0.7991
Epoch 10/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.5559 - acc: 0.8431 - val_loss: 0.5651 - val_acc: 0.8109
Epoch 11/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.5263 - acc: 0.8525 - val_loss: 0.5406 - val_acc: 0.8191
Epoch 12/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.4963 - acc: 0.8631 - val_loss: 0.5160 - val_acc: 0.8222
Epoch 13/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.4661 - acc: 0.8737 - val_loss: 0.4923 - val_acc: 0.8316
Epoch 14/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.4372 - acc: 0.8817 - val_loss: 0.4698 - val_acc: 0.8385
Epoch 15/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.4094 - acc: 0.8905 - val_loss: 0.4488 - val_acc: 0.8456
Epoch 16/40
15000/15000 [==============================] - 1s 73us/step - loss: 0.3832 - acc: 0.8973 - val_loss: 0.4300 - val_acc: 0.8493
Epoch 17/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.3590 - acc: 0.9053 - val_loss: 0.4118 - val_acc: 0.8551
Epoch 18/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.3362 - acc: 0.9120 - val_loss: 0.3960 - val_acc: 0.8589
Epoch 19/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.3155 - acc: 0.9158 - val_loss: 0.3819 - val_acc: 0.8626
Epoch 20/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.2964 - acc: 0.9211 - val_loss: 0.3696 - val_acc: 0.8651
Epoch 21/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.2789 - acc: 0.9247 - val_loss: 0.3583 - val_acc: 0.8694
Epoch 22/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.2629 - acc: 0.9287 - val_loss: 0.3480 - val_acc: 0.8725
Epoch 23/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.2483 - acc: 0.9337 - val_loss: 0.3391 - val_acc: 0.8747
Epoch 24/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.2343 - acc: 0.9383 - val_loss: 0.3313 - val_acc: 0.8762
Epoch 25/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.2214 - acc: 0.9427 - val_loss: 0.3243 - val_acc: 0.8789
Epoch 26/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.2095 - acc: 0.9447 - val_loss: 0.3181 - val_acc: 0.8784
Epoch 27/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1987 - acc: 0.9482 - val_loss: 0.3127 - val_acc: 0.8790
Epoch 28/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1884 - acc: 0.9525 - val_loss: 0.3077 - val_acc: 0.8801
Epoch 29/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.1787 - acc: 0.9552 - val_loss: 0.3037 - val_acc: 0.8812
Epoch 30/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.1697 - acc: 0.9578 - val_loss: 0.2998 - val_acc: 0.8826
Epoch 31/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1611 - acc: 0.9609 - val_loss: 0.2962 - val_acc: 0.8832
Epoch 32/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1533 - acc: 0.9634 - val_loss: 0.2934 - val_acc: 0.8841
Epoch 33/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.1458 - acc: 0.9663 - val_loss: 0.2908 - val_acc: 0.8850
Epoch 34/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.1388 - acc: 0.9685 - val_loss: 0.2883 - val_acc: 0.8854
Epoch 35/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1321 - acc: 0.9701 - val_loss: 0.2871 - val_acc: 0.8862
Epoch 36/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1259 - acc: 0.9725 - val_loss: 0.2845 - val_acc: 0.8855
Epoch 37/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1201 - acc: 0.9743 - val_loss: 0.2843 - val_acc: 0.8859
Epoch 38/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1146 - acc: 0.9767 - val_loss: 0.2834 - val_acc: 0.8853
Epoch 39/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.1093 - acc: 0.9779 - val_loss: 0.2815 - val_acc: 0.8866
Epoch 40/40
15000/15000 [==============================] - 1s 69us/step - loss: 0.1043 - acc: 0.9801 - val_loss: 0.2804 - val_acc: 0.8878

In [32]:
results = model.evaluate(test_data, test_labels)


25000/25000 [==============================] - 1s 43us/step

In [33]:
print(results)


[0.3344138513469696, 0.87244]

In [34]:
history_dict = history.history
history_dict.keys()


Out[34]:
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])

In [43]:
import matplotlib.pyplot as plt

acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(1, len(acc) + 1)

# "bo" is for "blue dot"
plt.plot(epochs, loss, 'bo', label='Training loss')
# b is for "solid blue line"
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()

plt.show()



In [44]:
plt.clf()   # clear figure
acc_values = history_dict['acc']
val_acc_values = history_dict['val_acc']

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()

plt.show()



In [38]:
history = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=40,
                    batch_size=1024,
                    validation_data=(x_val, y_val),
                    verbose=1)


Train on 15000 samples, validate on 10000 samples
Epoch 1/40
15000/15000 [==============================] - 1s 60us/step - loss: 3.7544e-07 - acc: 1.0000 - val_loss: 1.1327 - val_acc: 0.8629
Epoch 2/40
15000/15000 [==============================] - 1s 62us/step - loss: 3.8036e-07 - acc: 1.0000 - val_loss: 1.1329 - val_acc: 0.8627
Epoch 3/40
15000/15000 [==============================] - 1s 63us/step - loss: 3.7371e-07 - acc: 1.0000 - val_loss: 1.1325 - val_acc: 0.8623
Epoch 4/40
15000/15000 [==============================] - 1s 70us/step - loss: 3.6858e-07 - acc: 1.0000 - val_loss: 1.1326 - val_acc: 0.8623
Epoch 5/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.6539e-07 - acc: 1.0000 - val_loss: 1.1332 - val_acc: 0.8625
Epoch 6/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.6334e-07 - acc: 1.0000 - val_loss: 1.1334 - val_acc: 0.8625
Epoch 7/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.6110e-07 - acc: 1.0000 - val_loss: 1.1336 - val_acc: 0.8626
Epoch 8/40
15000/15000 [==============================] - 1s 67us/step - loss: 3.5959e-07 - acc: 1.0000 - val_loss: 1.1339 - val_acc: 0.8624
Epoch 9/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.5732e-07 - acc: 1.0000 - val_loss: 1.1343 - val_acc: 0.8624
Epoch 10/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.5602e-07 - acc: 1.0000 - val_loss: 1.1346 - val_acc: 0.8624
Epoch 11/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.5433e-07 - acc: 1.0000 - val_loss: 1.1348 - val_acc: 0.8622
Epoch 12/40
15000/15000 [==============================] - 1s 69us/step - loss: 3.5248e-07 - acc: 1.0000 - val_loss: 1.1352 - val_acc: 0.8624
Epoch 13/40
15000/15000 [==============================] - 1s 69us/step - loss: 3.5083e-07 - acc: 1.0000 - val_loss: 1.1355 - val_acc: 0.8624
Epoch 14/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.4880e-07 - acc: 1.0000 - val_loss: 1.1358 - val_acc: 0.8624
Epoch 15/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.4733e-07 - acc: 1.0000 - val_loss: 1.1361 - val_acc: 0.8622
Epoch 16/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.4512e-07 - acc: 1.0000 - val_loss: 1.1367 - val_acc: 0.8623
Epoch 17/40
 8192/15000 [===============>..............] - ETA: 0s - loss: 3.7739e-07 - acc: 1.0000
---------------------------------------------------------------------------
KeyboardInterrupt                         Traceback (most recent call last)
<ipython-input-38-b6957959e5ce> in <module>()
      4                     batch_size=1024,
      5                     validation_data=(x_val, y_val),
----> 6                     verbose=1)

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, max_queue_size, workers, use_multiprocessing, **kwargs)
   1637           initial_epoch=initial_epoch,
   1638           steps_per_epoch=steps_per_epoch,
-> 1639           validation_steps=validation_steps)
   1640 
   1641   def evaluate(self,

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_arrays.py in fit_loop(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps)
    219           batch_logs[l] = o
    220 
--> 221         callbacks.on_batch_end(batch_index, batch_logs)
    222         if callbacks.model.stop_training:
    223           break

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/callbacks.py in on_batch_end(self, batch, logs)
    247     t_before_callbacks = time.time()
    248     for callback in self.callbacks:
--> 249       callback.on_batch_end(batch, logs)
    250     self._delta_ts_batch_end.append(time.time() - t_before_callbacks)
    251     delta_t_median = np.median(self._delta_ts_batch_end)

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/callbacks.py in on_batch_end(self, batch, logs)
    467     # will be handled by on_epoch_end.
    468     if self.verbose and self.seen < self.target:
--> 469       self.progbar.update(self.seen, self.log_values)
    470 
    471   def on_epoch_end(self, epoch, logs=None):

/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py in update(self, current, values)
    433 
    434       sys.stdout.write(info)
--> 435       sys.stdout.flush()
    436 
    437     elif self.verbose == 2:

/usr/local/lib/python3.6/dist-packages/ipykernel/iostream.py in flush(self)
    319             evt = threading.Event()
    320             self.pub_thread.schedule(evt.set)
--> 321             evt.wait()
    322         else:
    323             self._flush()

/usr/lib/python3.6/threading.py in wait(self, timeout)
    549             signaled = self._flag
    550             if not signaled:
--> 551                 signaled = self._cond.wait(timeout)
    552             return signaled
    553 

/usr/lib/python3.6/threading.py in wait(self, timeout)
    293         try:    # restore state no matter what (e.g., KeyboardInterrupt)
    294             if timeout is None:
--> 295                 waiter.acquire()
    296                 gotit = True
    297             else:

KeyboardInterrupt: 

In [0]:


In [0]: