In [0]:
import tensorflow as tf
from tensorflow import keras
import numpy as np
In [0]:
dataset = keras.datasets.imdb
In [0]:
(train_data,train_labels), (test_data,test_labels) = dataset.load_data()
In [15]:
train_data.shape
Out[15]:
(25000,)
In [16]:
test_data.shape
Out[16]:
(25000,)
In [0]:
word_index = dataset.get_word_index()
In [18]:
word_index
Out[18]:
{'fawn': 34701,
'tsukino': 52006,
'nunnery': 52007,
'sonja': 16816,
'vani': 63951,
'woods': 1408,
'spiders': 16115,
'hanging': 2345,
'woody': 2289,
'trawling': 52008,
"hold's": 52009,
'comically': 11307,
'localized': 40830,
'disobeying': 30568,
"'royale": 52010,
"harpo's": 40831,
'canet': 52011,
'aileen': 19313,
'acurately': 52012,
"diplomat's": 52013,
'rickman': 25242,
'arranged': 6746,
'rumbustious': 52014,
'familiarness': 52015,
"spider'": 52016,
'hahahah': 68804,
"wood'": 52017,
'transvestism': 40833,
"hangin'": 34702,
'bringing': 2338,
'seamier': 40834,
'wooded': 34703,
'bravora': 52018,
'grueling': 16817,
'wooden': 1636,
'wednesday': 16818,
"'prix": 52019,
'altagracia': 34704,
'circuitry': 52020,
'crotch': 11585,
'busybody': 57766,
"tart'n'tangy": 52021,
'burgade': 14129,
'thrace': 52023,
"tom's": 11038,
'snuggles': 52025,
'francesco': 29114,
'complainers': 52027,
'templarios': 52125,
'272': 40835,
'273': 52028,
'zaniacs': 52130,
'275': 34706,
'consenting': 27631,
'snuggled': 40836,
'inanimate': 15492,
'uality': 52030,
'bronte': 11926,
'errors': 4010,
'dialogs': 3230,
"yomada's": 52031,
"madman's": 34707,
'dialoge': 30585,
'usenet': 52033,
'videodrome': 40837,
"kid'": 26338,
'pawed': 52034,
"'girlfriend'": 30569,
"'pleasure": 52035,
"'reloaded'": 52036,
"kazakos'": 40839,
'rocque': 52037,
'mailings': 52038,
'brainwashed': 11927,
'mcanally': 16819,
"tom''": 52039,
'kurupt': 25243,
'affiliated': 21905,
'babaganoosh': 52040,
"noe's": 40840,
'quart': 40841,
'kids': 359,
'uplifting': 5034,
'controversy': 7093,
'kida': 21906,
'kidd': 23379,
"error'": 52041,
'neurologist': 52042,
'spotty': 18510,
'cobblers': 30570,
'projection': 9878,
'fastforwarding': 40842,
'sters': 52043,
"eggar's": 52044,
'etherything': 52045,
'gateshead': 40843,
'airball': 34708,
'unsinkable': 25244,
'stern': 7180,
"cervi's": 52046,
'dnd': 40844,
'dna': 11586,
'insecurity': 20598,
"'reboot'": 52047,
'trelkovsky': 11037,
'jaekel': 52048,
'sidebars': 52049,
"sforza's": 52050,
'distortions': 17633,
'mutinies': 52051,
'sermons': 30602,
'7ft': 40846,
'boobage': 52052,
"o'bannon's": 52053,
'populations': 23380,
'chulak': 52054,
'mesmerize': 27633,
'quinnell': 52055,
'yahoo': 10307,
'meteorologist': 52057,
'beswick': 42577,
'boorman': 15493,
'voicework': 40847,
"ster'": 52058,
'blustering': 22922,
'hj': 52059,
'intake': 27634,
'morally': 5621,
'jumbling': 40849,
'bowersock': 52060,
"'porky's'": 52061,
'gershon': 16821,
'ludicrosity': 40850,
'coprophilia': 52062,
'expressively': 40851,
"india's": 19500,
"post's": 34710,
'wana': 52063,
'wang': 5283,
'wand': 30571,
'wane': 25245,
'edgeways': 52321,
'titanium': 34711,
'pinta': 40852,
'want': 178,
'pinto': 30572,
'whoopdedoodles': 52065,
'tchaikovsky': 21908,
'travel': 2103,
"'victory'": 52066,
'copious': 11928,
'gouge': 22433,
"chapters'": 52067,
'barbra': 6702,
'uselessness': 30573,
"wan'": 52068,
'assimilated': 27635,
'petiot': 16116,
'most\x85and': 52069,
'dinosaurs': 3930,
'wrong': 352,
'seda': 52070,
'stollen': 52071,
'sentencing': 34712,
'ouroboros': 40853,
'assimilates': 40854,
'colorfully': 40855,
'glenne': 27636,
'dongen': 52072,
'subplots': 4760,
'kiloton': 52073,
'chandon': 23381,
"effect'": 34713,
'snugly': 27637,
'kuei': 40856,
'welcomed': 9092,
'dishonor': 30071,
'concurrence': 52075,
'stoicism': 23382,
"guys'": 14896,
"beroemd'": 52077,
'butcher': 6703,
"melfi's": 40857,
'aargh': 30623,
'playhouse': 20599,
'wickedly': 11308,
'fit': 1180,
'labratory': 52078,
'lifeline': 40859,
'screaming': 1927,
'fix': 4287,
'cineliterate': 52079,
'fic': 52080,
'fia': 52081,
'fig': 34714,
'fmvs': 52082,
'fie': 52083,
'reentered': 52084,
'fin': 30574,
'doctresses': 52085,
'fil': 52086,
'zucker': 12606,
'ached': 31931,
'counsil': 52088,
'paterfamilias': 52089,
'songwriter': 13885,
'shivam': 34715,
'hurting': 9654,
'effects': 299,
'slauther': 52090,
"'flame'": 52091,
'sommerset': 52092,
'interwhined': 52093,
'whacking': 27638,
'bartok': 52094,
'barton': 8775,
'frewer': 21909,
"fi'": 52095,
'ingrid': 6192,
'stribor': 30575,
'approporiately': 52096,
'wobblyhand': 52097,
'tantalisingly': 52098,
'ankylosaurus': 52099,
'parasites': 17634,
'childen': 52100,
"jenkins'": 52101,
'metafiction': 52102,
'golem': 17635,
'indiscretion': 40860,
"reeves'": 23383,
"inamorata's": 57781,
'brittannica': 52104,
'adapt': 7916,
"russo's": 30576,
'guitarists': 48246,
'abbott': 10553,
'abbots': 40861,
'lanisha': 17649,
'magickal': 40863,
'mattter': 52105,
"'willy": 52106,
'pumpkins': 34716,
'stuntpeople': 52107,
'estimate': 30577,
'ugghhh': 40864,
'gameplay': 11309,
"wern't": 52108,
"n'sync": 40865,
'sickeningly': 16117,
'chiara': 40866,
'disturbed': 4011,
'portmanteau': 40867,
'ineffectively': 52109,
"duchonvey's": 82143,
"nasty'": 37519,
'purpose': 1285,
'lazers': 52112,
'lightened': 28105,
'kaliganj': 52113,
'popularism': 52114,
"damme's": 18511,
'stylistics': 30578,
'mindgaming': 52115,
'spoilerish': 46449,
"'corny'": 52117,
'boerner': 34718,
'olds': 6792,
'bakelite': 52118,
'renovated': 27639,
'forrester': 27640,
"lumiere's": 52119,
'gaskets': 52024,
'needed': 884,
'smight': 34719,
'master': 1297,
"edie's": 25905,
'seeber': 40868,
'hiya': 52120,
'fuzziness': 52121,
'genesis': 14897,
'rewards': 12607,
'enthrall': 30579,
"'about": 40869,
"recollection's": 52122,
'mutilated': 11039,
'fatherlands': 52123,
"fischer's": 52124,
'positively': 5399,
'270': 34705,
'ahmed': 34720,
'zatoichi': 9836,
'bannister': 13886,
'anniversaries': 52127,
"helm's": 30580,
"'work'": 52128,
'exclaimed': 34721,
"'unfunny'": 52129,
'274': 52029,
'feeling': 544,
"wanda's": 52131,
'dolan': 33266,
'278': 52133,
'peacoat': 52134,
'brawny': 40870,
'mishra': 40871,
'worlders': 40872,
'protags': 52135,
'skullcap': 52136,
'dastagir': 57596,
'affairs': 5622,
'wholesome': 7799,
'hymen': 52137,
'paramedics': 25246,
'unpersons': 52138,
'heavyarms': 52139,
'affaire': 52140,
'coulisses': 52141,
'hymer': 40873,
'kremlin': 52142,
'shipments': 30581,
'pixilated': 52143,
"'00s": 30582,
'diminishing': 18512,
'cinematic': 1357,
'resonates': 14898,
'simplify': 40874,
"nature'": 40875,
'temptresses': 40876,
'reverence': 16822,
'resonated': 19502,
'dailey': 34722,
'2\x85': 52144,
'treize': 27641,
'majo': 52145,
'kiya': 21910,
'woolnough': 52146,
'thanatos': 39797,
'sandoval': 35731,
'dorama': 40879,
"o'shaughnessy": 52147,
'tech': 4988,
'fugitives': 32018,
'teck': 30583,
"'e'": 76125,
'doesn’t': 40881,
'purged': 52149,
'saying': 657,
"martians'": 41095,
'norliss': 23418,
'dickey': 27642,
'dicker': 52152,
"'sependipity": 52153,
'padded': 8422,
'ordell': 57792,
"sturges'": 40882,
'independentcritics': 52154,
'tempted': 5745,
"atkinson's": 34724,
'hounded': 25247,
'apace': 52155,
'clicked': 15494,
"'humor'": 30584,
"martino's": 17177,
"'supporting": 52156,
'warmongering': 52032,
"zemeckis's": 34725,
'lube': 21911,
'shocky': 52157,
'plate': 7476,
'plata': 40883,
'sturgess': 40884,
"nerds'": 40885,
'plato': 20600,
'plath': 34726,
'platt': 40886,
'mcnab': 52159,
'clumsiness': 27643,
'altogether': 3899,
'massacring': 42584,
'bicenntinial': 52160,
'skaal': 40887,
'droning': 14360,
'lds': 8776,
'jaguar': 21912,
"cale's": 34727,
'nicely': 1777,
'mummy': 4588,
"lot's": 18513,
'patch': 10086,
'kerkhof': 50202,
"leader's": 52161,
"'movie": 27644,
'uncomfirmed': 52162,
'heirloom': 40888,
'wrangle': 47360,
'emotion\x85': 52163,
"'stargate'": 52164,
'pinoy': 40889,
'conchatta': 40890,
'broeke': 41128,
'advisedly': 40891,
"barker's": 17636,
'descours': 52166,
'lots': 772,
'lotr': 9259,
'irs': 9879,
'lott': 52167,
'xvi': 40892,
'irk': 34728,
'irl': 52168,
'ira': 6887,
'belzer': 21913,
'irc': 52169,
'ire': 27645,
'requisites': 40893,
'discipline': 7693,
'lyoko': 52961,
'extend': 11310,
'nature': 873,
"'dickie'": 52170,
'optimist': 40894,
'lapping': 30586,
'superficial': 3900,
'vestment': 52171,
'extent': 2823,
'tendons': 52172,
"heller's": 52173,
'quagmires': 52174,
'miyako': 52175,
'moocow': 20601,
"coles'": 52176,
'lookit': 40895,
'ravenously': 52177,
'levitating': 40896,
'perfunctorily': 52178,
'lookin': 30587,
"lot'": 40898,
'lookie': 52179,
'fearlessly': 34870,
'libyan': 52181,
'fondles': 40899,
'gopher': 35714,
'wearying': 40901,
"nz's": 52182,
'minuses': 27646,
'puposelessly': 52183,
'shandling': 52184,
'decapitates': 31268,
'humming': 11929,
"'nother": 40902,
'smackdown': 21914,
'underdone': 30588,
'frf': 40903,
'triviality': 52185,
'fro': 25248,
'bothers': 8777,
"'kensington": 52186,
'much': 73,
'muco': 34730,
'wiseguy': 22615,
"richie's": 27648,
'tonino': 40904,
'unleavened': 52187,
'fry': 11587,
"'tv'": 40905,
'toning': 40906,
'obese': 14361,
'sensationalized': 30589,
'spiv': 40907,
'spit': 6259,
'arkin': 7364,
'charleton': 21915,
'jeon': 16823,
'boardroom': 21916,
'doubts': 4989,
'spin': 3084,
'hepo': 53083,
'wildcat': 27649,
'venoms': 10584,
'misconstrues': 52191,
'mesmerising': 18514,
'misconstrued': 40908,
'rescinds': 52192,
'prostrate': 52193,
'majid': 40909,
'climbed': 16479,
'canoeing': 34731,
'majin': 52195,
'animie': 57804,
'sylke': 40910,
'conditioned': 14899,
'waddell': 40911,
'3\x85': 52196,
'hyperdrive': 41188,
'conditioner': 34732,
'bricklayer': 53153,
'hong': 2576,
'memoriam': 52198,
'inventively': 30592,
"levant's": 25249,
'portobello': 20638,
'remand': 52200,
'mummified': 19504,
'honk': 27650,
'spews': 19505,
'visitations': 40912,
'mummifies': 52201,
'cavanaugh': 25250,
'zeon': 23385,
"jungle's": 40913,
'viertel': 34733,
'frenchmen': 27651,
'torpedoes': 52202,
'schlessinger': 52203,
'torpedoed': 34734,
'blister': 69876,
'cinefest': 52204,
'furlough': 34735,
'mainsequence': 52205,
'mentors': 40914,
'academic': 9094,
'stillness': 20602,
'academia': 40915,
'lonelier': 52206,
'nibby': 52207,
"losers'": 52208,
'cineastes': 40916,
'corporate': 4449,
'massaging': 40917,
'bellow': 30593,
'absurdities': 19506,
'expetations': 53241,
'nyfiken': 40918,
'mehras': 75638,
'lasse': 52209,
'visability': 52210,
'militarily': 33946,
"elder'": 52211,
'gainsbourg': 19023,
'hah': 20603,
'hai': 13420,
'haj': 34736,
'hak': 25251,
'hal': 4311,
'ham': 4892,
'duffer': 53259,
'haa': 52213,
'had': 66,
'advancement': 11930,
'hag': 16825,
"hand'": 25252,
'hay': 13421,
'mcnamara': 20604,
"mozart's": 52214,
'duffel': 30731,
'haq': 30594,
'har': 13887,
'has': 44,
'hat': 2401,
'hav': 40919,
'haw': 30595,
'figtings': 52215,
'elders': 15495,
'underpanted': 52216,
'pninson': 52217,
'unequivocally': 27652,
"barbara's": 23673,
"bello'": 52219,
'indicative': 12997,
'yawnfest': 40920,
'hexploitation': 52220,
"loder's": 52221,
'sleuthing': 27653,
"justin's": 32622,
"'ball": 52222,
"'summer": 52223,
"'demons'": 34935,
"mormon's": 52225,
"laughton's": 34737,
'debell': 52226,
'shipyard': 39724,
'unabashedly': 30597,
'disks': 40401,
'crowd': 2290,
'crowe': 10087,
"vancouver's": 56434,
'mosques': 34738,
'crown': 6627,
'culpas': 52227,
'crows': 27654,
'surrell': 53344,
'flowless': 52229,
'sheirk': 52230,
"'three": 40923,
"peterson'": 52231,
'ooverall': 52232,
'perchance': 40924,
'bottom': 1321,
'chabert': 53363,
'sneha': 52233,
'inhuman': 13888,
'ichii': 52234,
'ursla': 52235,
'completly': 30598,
'moviedom': 40925,
'raddick': 52236,
'brundage': 51995,
'brigades': 40926,
'starring': 1181,
"'goal'": 52237,
'caskets': 52238,
'willcock': 52239,
"threesome's": 52240,
"mosque'": 52241,
"cover's": 52242,
'spaceships': 17637,
'anomalous': 40927,
'ptsd': 27655,
'shirdan': 52243,
'obscenity': 21962,
'lemmings': 30599,
'duccio': 30600,
"levene's": 52244,
"'gorby'": 52245,
"teenager's": 25255,
'marshall': 5340,
'honeymoon': 9095,
'shoots': 3231,
'despised': 12258,
'okabasho': 52246,
'fabric': 8289,
'cannavale': 18515,
'raped': 3537,
"tutt's": 52247,
'grasping': 17638,
'despises': 18516,
"thief's": 40928,
'rapes': 8926,
'raper': 52248,
"eyre'": 27656,
'walchek': 52249,
"elmo's": 23386,
'perfumes': 40929,
'spurting': 21918,
"exposition'\x85": 52250,
'denoting': 52251,
'thesaurus': 34740,
"shoot'": 40930,
'bonejack': 49759,
'simpsonian': 52253,
'hebetude': 30601,
"hallow's": 34741,
'desperation\x85': 52254,
'incinerator': 34742,
'congratulations': 10308,
'humbled': 52255,
"else's": 5924,
'trelkovski': 40845,
"rape'": 52256,
"'chapters'": 59386,
'1600s': 52257,
'martian': 7253,
'nicest': 25256,
'eyred': 52259,
'passenger': 9457,
'disgrace': 6041,
'moderne': 52260,
'barrymore': 5120,
'yankovich': 52261,
'moderns': 40931,
'studliest': 52262,
'bedsheet': 52263,
'decapitation': 14900,
'slurring': 52264,
"'nunsploitation'": 52265,
"'character'": 34743,
'cambodia': 9880,
'rebelious': 52266,
'pasadena': 27657,
'crowne': 40932,
"'bedchamber": 52267,
'conjectural': 52268,
'appologize': 52269,
'halfassing': 52270,
'paycheque': 57816,
'palms': 20606,
"'islands": 52271,
'hawked': 40933,
'palme': 21919,
'conservatively': 40934,
'larp': 64007,
'palma': 5558,
'smelling': 21920,
'aragorn': 12998,
'hawker': 52272,
'hawkes': 52273,
'explosions': 3975,
'loren': 8059,
"pyle's": 52274,
'shootout': 6704,
"mike's": 18517,
"driscoll's": 52275,
'cogsworth': 40935,
"britian's": 52276,
'childs': 34744,
"portrait's": 52277,
'chain': 3626,
'whoever': 2497,
'puttered': 52278,
'childe': 52279,
'maywether': 52280,
'chair': 3036,
"rance's": 52281,
'machu': 34745,
'ballet': 4517,
'grapples': 34746,
'summerize': 76152,
'freelance': 30603,
"andrea's": 52283,
'\x91very': 52284,
'coolidge': 45879,
'mache': 18518,
'balled': 52285,
'grappled': 40937,
'macha': 18519,
'underlining': 21921,
'macho': 5623,
'oversight': 19507,
'machi': 25257,
'verbally': 11311,
'tenacious': 21922,
'windshields': 40938,
'paychecks': 18557,
'jerk': 3396,
"good'": 11931,
'prancer': 34748,
'prances': 21923,
'olympus': 52286,
'lark': 21924,
'embark': 10785,
'gloomy': 7365,
'jehaan': 52287,
'turaqui': 52288,
"child'": 20607,
'locked': 2894,
'pranced': 52289,
'exact': 2588,
'unattuned': 52290,
'minute': 783,
'skewed': 16118,
'hodgins': 40940,
'skewer': 34749,
'think\x85': 52291,
'rosenstein': 38765,
'helmit': 52292,
'wrestlemanias': 34750,
'hindered': 16826,
"martha's": 30604,
'cheree': 52293,
"pluckin'": 52294,
'ogles': 40941,
'heavyweight': 11932,
'aada': 82190,
'chopping': 11312,
'strongboy': 61534,
'hegemonic': 41342,
'adorns': 40942,
'xxth': 41346,
'nobuhiro': 34751,
'capitães': 52298,
'kavogianni': 52299,
'antwerp': 13422,
'celebrated': 6538,
'roarke': 52300,
'baggins': 40943,
'cheeseburgers': 31270,
'matras': 52301,
"nineties'": 52302,
"'craig'": 52303,
'celebrates': 12999,
'unintentionally': 3383,
'drafted': 14362,
'climby': 52304,
'303': 52305,
'oldies': 18520,
'climbs': 9096,
'honour': 9655,
'plucking': 34752,
'305': 30074,
'address': 5514,
'menjou': 40944,
"'freak'": 42592,
'dwindling': 19508,
'benson': 9458,
'white’s': 52307,
'shamelessness': 40945,
'impacted': 21925,
'upatz': 52308,
'cusack': 3840,
"flavia's": 37567,
'effette': 52309,
'influx': 34753,
'boooooooo': 52310,
'dimitrova': 52311,
'houseman': 13423,
'bigas': 25259,
'boylen': 52312,
'phillipenes': 52313,
'fakery': 40946,
"grandpa's": 27658,
'darnell': 27659,
'undergone': 19509,
'handbags': 52315,
'perished': 21926,
'pooped': 37778,
'vigour': 27660,
'opposed': 3627,
'etude': 52316,
"caine's": 11799,
'doozers': 52317,
'photojournals': 34754,
'perishes': 52318,
'constrains': 34755,
'migenes': 40948,
'consoled': 30605,
'alastair': 16827,
'wvs': 52319,
'ooooooh': 52320,
'approving': 34756,
'consoles': 40949,
'disparagement': 52064,
'futureistic': 52322,
'rebounding': 52323,
"'date": 52324,
'gregoire': 52325,
'rutherford': 21927,
'americanised': 34757,
'novikov': 82196,
'following': 1042,
'munroe': 34758,
"morita'": 52326,
'christenssen': 52327,
'oatmeal': 23106,
'fossey': 25260,
'livered': 40950,
'listens': 13000,
"'marci": 76164,
"otis's": 52330,
'thanking': 23387,
'maude': 16019,
'extensions': 34759,
'ameteurish': 52332,
"commender's": 52333,
'agricultural': 27661,
'convincingly': 4518,
'fueled': 17639,
'mahattan': 54014,
"paris's": 40952,
'vulkan': 52336,
'stapes': 52337,
'odysessy': 52338,
'harmon': 12259,
'surfing': 4252,
'halloran': 23494,
'unbelieveably': 49580,
"'offed'": 52339,
'quadrant': 30607,
'inhabiting': 19510,
'nebbish': 34760,
'forebears': 40953,
'skirmish': 34761,
'ocassionally': 52340,
"'resist": 52341,
'impactful': 21928,
'spicier': 52342,
'touristy': 40954,
"'football'": 52343,
'webpage': 40955,
'exurbia': 52345,
'jucier': 52346,
'professors': 14901,
'structuring': 34762,
'jig': 30608,
'overlord': 40956,
'disconnect': 25261,
'sniffle': 82201,
'slimeball': 40957,
'jia': 40958,
'milked': 16828,
'banjoes': 40959,
'jim': 1237,
'workforces': 52348,
'jip': 52349,
'rotweiller': 52350,
'mundaneness': 34763,
"'ninja'": 52351,
"dead'": 11040,
"cipriani's": 40960,
'modestly': 20608,
"professor'": 52352,
'shacked': 40961,
'bashful': 34764,
'sorter': 23388,
'overpowering': 16120,
'workmanlike': 18521,
'henpecked': 27662,
'sorted': 18522,
"jōb's": 52354,
"'always": 52355,
"'baptists": 34765,
'dreamcatchers': 52356,
"'silence'": 52357,
'hickory': 21929,
'fun\x97yet': 52358,
'breakumentary': 52359,
'didn': 15496,
'didi': 52360,
'pealing': 52361,
'dispite': 40962,
"italy's": 25262,
'instability': 21930,
'quarter': 6539,
'quartet': 12608,
'padmé': 52362,
"'bleedmedry": 52363,
'pahalniuk': 52364,
'honduras': 52365,
'bursting': 10786,
"pablo's": 41465,
'irremediably': 52367,
'presages': 40963,
'bowlegged': 57832,
'dalip': 65183,
'entering': 6260,
'newsradio': 76172,
'presaged': 54150,
"giallo's": 27663,
'bouyant': 40964,
'amerterish': 52368,
'rajni': 18523,
'leeves': 30610,
'macauley': 34767,
'seriously': 612,
'sugercoma': 52369,
'grimstead': 52370,
"'fairy'": 52371,
'zenda': 30611,
"'twins'": 52372,
'realisation': 17640,
'highsmith': 27664,
'raunchy': 7817,
'incentives': 40965,
'flatson': 52374,
'snooker': 35097,
'crazies': 16829,
'crazier': 14902,
'grandma': 7094,
'napunsaktha': 52375,
'workmanship': 30612,
'reisner': 52376,
"sanford's": 61306,
'\x91doña': 52377,
'modest': 6108,
"everything's": 19153,
'hamer': 40966,
"couldn't'": 52379,
'quibble': 13001,
'socking': 52380,
'tingler': 21931,
'gutman': 52381,
'lachlan': 40967,
'tableaus': 52382,
'headbanger': 52383,
'spoken': 2847,
'cerebrally': 34768,
"'road": 23490,
'tableaux': 21932,
"proust's": 40968,
'periodical': 40969,
"shoveller's": 52385,
'tamara': 25263,
'affords': 17641,
'concert': 3249,
"yara's": 87955,
'someome': 52386,
'lingering': 8424,
"abraham's": 41511,
'beesley': 34769,
'cherbourg': 34770,
'kagan': 28624,
'snatch': 9097,
"miyazaki's": 9260,
'absorbs': 25264,
"koltai's": 40970,
'tingled': 64027,
'crossroads': 19511,
'rehab': 16121,
'falworth': 52389,
'sequals': 52390,
...}
In [0]:
In [0]:
reverse_word_index
In [0]:
word_index = {k:(v+3) for k,v in word_index.items()}
In [0]:
word_index["<PAD>"] = 0
In [0]:
word_index["<PAD>"]
In [0]:
train_data[0]
In [0]:
word_index["<START>"] = 1
word_index["<UNK>"] = 2 # unknown
word_index["<UNUSED>"] = 3
reverse_word_index = dict([(value,key) for (key,value) in word_index.items()])
In [0]:
def decode_review(text):
return ' '.join([reverse_word_index.get(i,'?') for i in text])
In [22]:
decode_review(train_data[0])
Out[22]:
"<START> this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert redford's is an amazing actor and now the same being director norman's father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for retail and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also congratulations to the two little boy's that played the part's of norman and paul they were just brilliant children are often left out of the praising list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all"
In [0]:
# A dictionary mapping words to an integer index
word_index = dataset.get_word_index()
# The first indices are reserved
word_index = {k:(v+3) for k,v in word_index.items()}
word_index["<PAD>"] = 0
word_index["<START>"] = 1
word_index["<UNK>"] = 2 # unknown
word_index["<UNUSED>"] = 3
reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
def decode_review(text):
return ' '.join([reverse_word_index.get(i, '?') for i in text])
In [0]:
decode_review(train_data[0])
Out[0]:
"<START> this film was just brilliant casting location scenery story direction everyone's really suited the part they played and you could just imagine being there robert redford's is an amazing actor and now the same being director norman's father came from the same scottish island as myself so i loved the fact there was a real connection with this film the witty remarks throughout the film were great it was just brilliant so much that i bought the film as soon as it was released for retail and would recommend it to everyone to watch and the fly fishing was amazing really cried at the end it was so sad and you know what they say if you cry at a film it must have been good and this definitely was also congratulations to the two little boy's that played the part's of norman and paul they were just brilliant children are often left out of the praising list i think because the stars that play them all grown up are such a big profile for the whole film but these children are amazing and should be praised for what they have done don't you think the whole story was so lovely because it was true and was someone's life after all that was shared with us all"
In [0]:
train_data = keras.preprocessing.sequence.pad_sequences(train_data,
value=word_index["<PAD>"],
padding='post',
maxlen=256)
test_data = keras.preprocessing.sequence.pad_sequences(test_data,
value=word_index["<PAD>"],
padding='post',
maxlen=256)
In [39]:
vocab_size = 100000
model = keras.Sequential()
model.add(keras.layers.Embedding(vocab_size, 16))
model.add(keras.layers.GlobalAveragePooling1D())
model.add(keras.layers.Dense(16, activation=tf.nn.relu))
model.add(keras.layers.Dense(1, activation=tf.nn.sigmoid))
model.summary()
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
embedding_3 (Embedding) (None, None, 16) 1600000
_________________________________________________________________
global_average_pooling1d_3 ( (None, 16) 0
_________________________________________________________________
dense_6 (Dense) (None, 16) 272
_________________________________________________________________
dense_7 (Dense) (None, 1) 17
=================================================================
Total params: 1,600,289
Trainable params: 1,600,289
Non-trainable params: 0
_________________________________________________________________
In [0]:
model.compile(optimizer=tf.train.AdamOptimizer(),
loss='binary_crossentropy',
metrics=['accuracy'])
In [0]:
x_val = train_data[:10000]
partial_x_train = train_data[10000:]
y_val = train_labels[:10000]
partial_y_train = train_labels[10000:]
In [42]:
history = model.fit(partial_x_train,
partial_y_train,
epochs=40,
batch_size=1024,
validation_data=(x_val, y_val),
verbose=1)
Train on 15000 samples, validate on 10000 samples
Epoch 1/40
15000/15000 [==============================] - 1s 93us/step - loss: 0.6924 - acc: 0.5355 - val_loss: 0.6911 - val_acc: 0.6015
Epoch 2/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.6891 - acc: 0.6336 - val_loss: 0.6874 - val_acc: 0.7201
Epoch 3/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.6838 - acc: 0.7521 - val_loss: 0.6818 - val_acc: 0.7514
Epoch 4/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.6760 - acc: 0.7896 - val_loss: 0.6740 - val_acc: 0.7690
Epoch 5/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.6652 - acc: 0.8043 - val_loss: 0.6631 - val_acc: 0.7681
Epoch 6/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.6506 - acc: 0.8088 - val_loss: 0.6491 - val_acc: 0.7821
Epoch 7/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.6321 - acc: 0.8187 - val_loss: 0.6317 - val_acc: 0.7820
Epoch 8/40
15000/15000 [==============================] - 1s 69us/step - loss: 0.6098 - acc: 0.8264 - val_loss: 0.6115 - val_acc: 0.7936
Epoch 9/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.5840 - acc: 0.8362 - val_loss: 0.5890 - val_acc: 0.7991
Epoch 10/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.5559 - acc: 0.8431 - val_loss: 0.5651 - val_acc: 0.8109
Epoch 11/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.5263 - acc: 0.8525 - val_loss: 0.5406 - val_acc: 0.8191
Epoch 12/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.4963 - acc: 0.8631 - val_loss: 0.5160 - val_acc: 0.8222
Epoch 13/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.4661 - acc: 0.8737 - val_loss: 0.4923 - val_acc: 0.8316
Epoch 14/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.4372 - acc: 0.8817 - val_loss: 0.4698 - val_acc: 0.8385
Epoch 15/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.4094 - acc: 0.8905 - val_loss: 0.4488 - val_acc: 0.8456
Epoch 16/40
15000/15000 [==============================] - 1s 73us/step - loss: 0.3832 - acc: 0.8973 - val_loss: 0.4300 - val_acc: 0.8493
Epoch 17/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.3590 - acc: 0.9053 - val_loss: 0.4118 - val_acc: 0.8551
Epoch 18/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.3362 - acc: 0.9120 - val_loss: 0.3960 - val_acc: 0.8589
Epoch 19/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.3155 - acc: 0.9158 - val_loss: 0.3819 - val_acc: 0.8626
Epoch 20/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.2964 - acc: 0.9211 - val_loss: 0.3696 - val_acc: 0.8651
Epoch 21/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.2789 - acc: 0.9247 - val_loss: 0.3583 - val_acc: 0.8694
Epoch 22/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.2629 - acc: 0.9287 - val_loss: 0.3480 - val_acc: 0.8725
Epoch 23/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.2483 - acc: 0.9337 - val_loss: 0.3391 - val_acc: 0.8747
Epoch 24/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.2343 - acc: 0.9383 - val_loss: 0.3313 - val_acc: 0.8762
Epoch 25/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.2214 - acc: 0.9427 - val_loss: 0.3243 - val_acc: 0.8789
Epoch 26/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.2095 - acc: 0.9447 - val_loss: 0.3181 - val_acc: 0.8784
Epoch 27/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1987 - acc: 0.9482 - val_loss: 0.3127 - val_acc: 0.8790
Epoch 28/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1884 - acc: 0.9525 - val_loss: 0.3077 - val_acc: 0.8801
Epoch 29/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.1787 - acc: 0.9552 - val_loss: 0.3037 - val_acc: 0.8812
Epoch 30/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.1697 - acc: 0.9578 - val_loss: 0.2998 - val_acc: 0.8826
Epoch 31/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1611 - acc: 0.9609 - val_loss: 0.2962 - val_acc: 0.8832
Epoch 32/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1533 - acc: 0.9634 - val_loss: 0.2934 - val_acc: 0.8841
Epoch 33/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.1458 - acc: 0.9663 - val_loss: 0.2908 - val_acc: 0.8850
Epoch 34/40
15000/15000 [==============================] - 1s 72us/step - loss: 0.1388 - acc: 0.9685 - val_loss: 0.2883 - val_acc: 0.8854
Epoch 35/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1321 - acc: 0.9701 - val_loss: 0.2871 - val_acc: 0.8862
Epoch 36/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1259 - acc: 0.9725 - val_loss: 0.2845 - val_acc: 0.8855
Epoch 37/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1201 - acc: 0.9743 - val_loss: 0.2843 - val_acc: 0.8859
Epoch 38/40
15000/15000 [==============================] - 1s 70us/step - loss: 0.1146 - acc: 0.9767 - val_loss: 0.2834 - val_acc: 0.8853
Epoch 39/40
15000/15000 [==============================] - 1s 71us/step - loss: 0.1093 - acc: 0.9779 - val_loss: 0.2815 - val_acc: 0.8866
Epoch 40/40
15000/15000 [==============================] - 1s 69us/step - loss: 0.1043 - acc: 0.9801 - val_loss: 0.2804 - val_acc: 0.8878
In [32]:
results = model.evaluate(test_data, test_labels)
25000/25000 [==============================] - 1s 43us/step
In [33]:
print(results)
[0.3344138513469696, 0.87244]
In [34]:
history_dict = history.history
history_dict.keys()
Out[34]:
dict_keys(['val_loss', 'val_acc', 'loss', 'acc'])
In [43]:
import matplotlib.pyplot as plt
acc = history.history['acc']
val_acc = history.history['val_acc']
loss = history.history['loss']
val_loss = history.history['val_loss']
epochs = range(1, len(acc) + 1)
# "bo" is for "blue dot"
plt.plot(epochs, loss, 'bo', label='Training loss')
# b is for "solid blue line"
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.legend()
plt.show()
In [44]:
plt.clf() # clear figure
acc_values = history_dict['acc']
val_acc_values = history_dict['val_acc']
plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.legend()
plt.show()
In [38]:
history = model.fit(partial_x_train,
partial_y_train,
epochs=40,
batch_size=1024,
validation_data=(x_val, y_val),
verbose=1)
Train on 15000 samples, validate on 10000 samples
Epoch 1/40
15000/15000 [==============================] - 1s 60us/step - loss: 3.7544e-07 - acc: 1.0000 - val_loss: 1.1327 - val_acc: 0.8629
Epoch 2/40
15000/15000 [==============================] - 1s 62us/step - loss: 3.8036e-07 - acc: 1.0000 - val_loss: 1.1329 - val_acc: 0.8627
Epoch 3/40
15000/15000 [==============================] - 1s 63us/step - loss: 3.7371e-07 - acc: 1.0000 - val_loss: 1.1325 - val_acc: 0.8623
Epoch 4/40
15000/15000 [==============================] - 1s 70us/step - loss: 3.6858e-07 - acc: 1.0000 - val_loss: 1.1326 - val_acc: 0.8623
Epoch 5/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.6539e-07 - acc: 1.0000 - val_loss: 1.1332 - val_acc: 0.8625
Epoch 6/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.6334e-07 - acc: 1.0000 - val_loss: 1.1334 - val_acc: 0.8625
Epoch 7/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.6110e-07 - acc: 1.0000 - val_loss: 1.1336 - val_acc: 0.8626
Epoch 8/40
15000/15000 [==============================] - 1s 67us/step - loss: 3.5959e-07 - acc: 1.0000 - val_loss: 1.1339 - val_acc: 0.8624
Epoch 9/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.5732e-07 - acc: 1.0000 - val_loss: 1.1343 - val_acc: 0.8624
Epoch 10/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.5602e-07 - acc: 1.0000 - val_loss: 1.1346 - val_acc: 0.8624
Epoch 11/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.5433e-07 - acc: 1.0000 - val_loss: 1.1348 - val_acc: 0.8622
Epoch 12/40
15000/15000 [==============================] - 1s 69us/step - loss: 3.5248e-07 - acc: 1.0000 - val_loss: 1.1352 - val_acc: 0.8624
Epoch 13/40
15000/15000 [==============================] - 1s 69us/step - loss: 3.5083e-07 - acc: 1.0000 - val_loss: 1.1355 - val_acc: 0.8624
Epoch 14/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.4880e-07 - acc: 1.0000 - val_loss: 1.1358 - val_acc: 0.8624
Epoch 15/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.4733e-07 - acc: 1.0000 - val_loss: 1.1361 - val_acc: 0.8622
Epoch 16/40
15000/15000 [==============================] - 1s 68us/step - loss: 3.4512e-07 - acc: 1.0000 - val_loss: 1.1367 - val_acc: 0.8623
Epoch 17/40
8192/15000 [===============>..............] - ETA: 0s - loss: 3.7739e-07 - acc: 1.0000
---------------------------------------------------------------------------
KeyboardInterrupt Traceback (most recent call last)
<ipython-input-38-b6957959e5ce> in <module>()
4 batch_size=1024,
5 validation_data=(x_val, y_val),
----> 6 verbose=1)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, max_queue_size, workers, use_multiprocessing, **kwargs)
1637 initial_epoch=initial_epoch,
1638 steps_per_epoch=steps_per_epoch,
-> 1639 validation_steps=validation_steps)
1640
1641 def evaluate(self,
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/engine/training_arrays.py in fit_loop(model, inputs, targets, sample_weights, batch_size, epochs, verbose, callbacks, val_inputs, val_targets, val_sample_weights, shuffle, initial_epoch, steps_per_epoch, validation_steps)
219 batch_logs[l] = o
220
--> 221 callbacks.on_batch_end(batch_index, batch_logs)
222 if callbacks.model.stop_training:
223 break
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/callbacks.py in on_batch_end(self, batch, logs)
247 t_before_callbacks = time.time()
248 for callback in self.callbacks:
--> 249 callback.on_batch_end(batch, logs)
250 self._delta_ts_batch_end.append(time.time() - t_before_callbacks)
251 delta_t_median = np.median(self._delta_ts_batch_end)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/callbacks.py in on_batch_end(self, batch, logs)
467 # will be handled by on_epoch_end.
468 if self.verbose and self.seen < self.target:
--> 469 self.progbar.update(self.seen, self.log_values)
470
471 def on_epoch_end(self, epoch, logs=None):
/usr/local/lib/python3.6/dist-packages/tensorflow/python/keras/utils/generic_utils.py in update(self, current, values)
433
434 sys.stdout.write(info)
--> 435 sys.stdout.flush()
436
437 elif self.verbose == 2:
/usr/local/lib/python3.6/dist-packages/ipykernel/iostream.py in flush(self)
319 evt = threading.Event()
320 self.pub_thread.schedule(evt.set)
--> 321 evt.wait()
322 else:
323 self._flush()
/usr/lib/python3.6/threading.py in wait(self, timeout)
549 signaled = self._flag
550 if not signaled:
--> 551 signaled = self._cond.wait(timeout)
552 return signaled
553
/usr/lib/python3.6/threading.py in wait(self, timeout)
293 try: # restore state no matter what (e.g., KeyboardInterrupt)
294 if timeout is None:
--> 295 waiter.acquire()
296 gotit = True
297 else:
KeyboardInterrupt:
In [0]:
In [0]:
Content source: Pritam-N/Linx
Similar notebooks: