importing require packages


In [133]:
from __future__ import print_function

import json
import os
import numpy as np
import sys
import h5py

from gensim.models import Word2Vec
from gensim.utils import simple_preprocess
from keras.layers import Embedding
from keras.models import Sequential
from keras.layers import Dense
from keras.layers import Dropout
from keras.layers import LSTM
from keras.preprocessing import sequence
from intersect_embeddings import Embeddings
from keras.callbacks import ModelCheckpoint

import plotly.plotly as py
import plotly.graph_objs as go

from nltk.tokenize import word_tokenize
import random
from itertools import groupby

Instantiate Embeddings


In [7]:
embeddings = Embeddings(300, 4, 1, 4)

Getting data from preprocessing


In [8]:
word2vec_model = embeddings.get_intersected_model()
word2index, index2word = embeddings.get_vocabulary()
word2vec_weights = word2vec_model.wv.syn0
tokenized_indexed_sentences = embeddings.get_indexed_sentences()


Loading Indexed Sentences...

In [9]:
word2index = {word:index+1 for word, index in word2index.items()}
index2word = {index:word for word, index in word2index.items()}

In [10]:
word2index


Out[10]:
{'serves': 1978,
 'unpredictable': 22529,
 'papacy': 5100,
 'the_village': 41151,
 'murad_ii': 17698,
 'early-spring': 35732,
 '1868_to_1876': 44635,
 'anglo-saxon': 5438,
 'virginia_key': 52473,
 'idealogy': 39439,
 'dinophyte': 30121,
 'kick': 9552,
 'operation_tomahawk': 52247,
 'enablements': 24720,
 'rsa': 26941,
 'funneling': 35294,
 'week': 3138,
 'austrian': 2668,
 'matricide': 18887,
 'attraction': 3604,
 'treaties': 1176,
 'paul_sheehy': 30418,
 'occupy': 2352,
 'inn': 5584,
 "sant'agata": 38316,
 'windows_8': 3298,
 'oceania': 50836,
 '8_million': 22514,
 'contoller': 25765,
 'wallis_simpson': 20411,
 'consuls': 13563,
 'pandemic': 18087,
 'hanzhong': 45318,
 'conga': 26900,
 'education': 300,
 'enacting': 14167,
 'fireworks': 11270,
 'hayk': 51361,
 'the_princess_elizabeth': 29527,
 'lakulish': 45066,
 'inside': 1666,
 'the_dornbirner_ach': 17381,
 'indian_ocean': 45330,
 'the_gurjara_pratihara_empire': 19374,
 'vowel': 4211,
 'knell': 49814,
 'the_roman_kingdom': 14644,
 'classis': 32372,
 'transfered': 19551,
 'infallible': 43773,
 'sultana': 18036,
 'insects': 804,
 'q_bridge': 38834,
 'caveats': 41027,
 'variously': 47925,
 'tangs': 36372,
 'relations': 1363,
 'aksumite': 35160,
 'gary_kildall': 27297,
 'moldovan': 36357,
 'brewers': 14032,
 'reims': 13665,
 'self-mortification': 30465,
 'redistributes': 23032,
 'lane_crawford': 19540,
 'aattack': 38253,
 'noise': 5893,
 'late_august': 29922,
 'hermes': 32113,
 'ahmad': 16352,
 'guided': 7686,
 'amersterdam_avenue': 36684,
 'png': 35809,
 'delisted': 14250,
 'imaginging': 32506,
 'afroasiatic': 36504,
 'the_pledge_of_allegiance': 39811,
 'north_africans': 23914,
 'seismologists': 17944,
 'bording': 40627,
 'not': 55,
 '1488': 39888,
 'portland': 41155,
 'the_arc_de_triomphe': 51363,
 'stuka': 47337,
 '2008': 579,
 'city_hall': 17750,
 'perceptible': 43594,
 'yard': 5678,
 '1730': 15498,
 'circumnavigate': 32408,
 'caliphs': 11802,
 'disco_eye-cued': 22828,
 'hmnb_davenport': 13997,
 'merchantmen': 50174,
 'workhorses': 39788,
 'the_high_court': 44008,
 'devensian': 25861,
 'fooled': 33642,
 '114th_street': 13600,
 'lower-case': 16055,
 'ducks': 5948,
 'july_23_1989': 19381,
 'pharaohs': 28504,
 'agains': 42485,
 'ripples': 24604,
 'mundofox': 32838,
 'yeas': 49039,
 'kimberly_amadeo': 28991,
 'classification_of_mammals': 34385,
 'ferguson': 14061,
 'mianyang_city': 13911,
 'specifies': 6112,
 'the_north_carolina_constitution': 26043,
 'the_windows_store': 13294,
 '14th': 8217,
 'the_late_13th': 44359,
 "luke's-roosevelt": 48497,
 'possesses': 24507,
 'the_morean_war': 40910,
 'milder': 45512,
 'ingaas': 47118,
 'abc_films': 20906,
 'port_of_london_authority': 25710,
 'semen': 34521,
 'koine': 28227,
 'yourself': 19894,
 'kineys': 29541,
 'mammals': 2313,
 'the_buddha_of': 35854,
 'bilingual-spanish': 40367,
 'anti-defense': 17655,
 '187th': 31139,
 'entetainment': 47384,
 'bulk': 4860,
 'transistor': 2717,
 'kimat': 28263,
 'harkness': 49991,
 'thomas_blantz': 41853,
 'flower': 4780,
 'flip': 20831,
 'escalation': 34417,
 'new_haven-based': 44639,
 'janet': 23252,
 'hrothgar_a': 42706,
 'the_italian_empire': 23930,
 'manichaean': 36215,
 'sunni_islam': 17121,
 'kaw-goosh-kaw-nick': 47034,
 'commended': 24657,
 'march_23_1833': 27115,
 'volta': 12769,
 'john_rawsl': 37215,
 'accountants': 29260,
 'annulled': 32015,
 'encounters': 5244,
 'encourage': 2439,
 'minibus': 18819,
 'revoled': 33455,
 'wv': 29342,
 'viceregal': 12271,
 'eased': 21118,
 'relatinship': 42629,
 'fables': 45499,
 'zywny': 41519,
 'codeshare': 31162,
 'old_dutch': 13002,
 'delve': 29271,
 'essentialism': 52345,
 'narth': 11027,
 '97_of': 50511,
 'finite': 4570,
 'st_mary': 11456,
 'grooves': 12349,
 'horn_of_africa': 14326,
 'windows_95': 32156,
 'flaw': 7103,
 'misrata': 13591,
 'sen': 41006,
 'conformities': 27128,
 '1995-1996': 44705,
 'irish-american': 40142,
 'sultan_selim': 11075,
 'the_quran': 18795,
 'junctions': 15736,
 'vince_lombardi_trophy': 40628,
 'merged': 2999,
 'nintendo_magazine': 28388,
 'sisvel': 13799,
 'cunha': 19906,
 'pertains': 13961,
 'configured': 13670,
 'hamilton': 19902,
 'medinese': 42847,
 'waht': 13317,
 'the_theory_of_games': 26481,
 'aslatia': 49541,
 'notre': 10438,
 'cardiff_university': 34101,
 'began': 378,
 'vendor': 11638,
 'obvious': 7565,
 'disorers': 40650,
 'pitcher': 19996,
 'thailand': 3389,
 'july_18_2007': 41240,
 'nomine': 28137,
 'grimm': 9042,
 'assignment': 14529,
 'pplace': 39192,
 'countries': 122,
 '1807': 38260,
 'dismissal': 12749,
 'researcher': 5286,
 'generated': 2486,
 'unabsorbed': 31382,
 'grand_master': 51791,
 'codependent': 27206,
 'john_marshall': 38895,
 'purporting': 52612,
 'reprinting': 23771,
 'premier': 2425,
 'galkayo': 48036,
 'color-rendering': 50757,
 'cincinnati': 21208,
 "the_palme_d'or_at": 44394,
 'contribute': 1940,
 'partridge': 50703,
 'subcategory': 32794,
 'vihuelas': 45440,
 'steep': 6644,
 'balanced': 10764,
 'lond': 24809,
 'replaced': 500,
 'ali_ibn_abi_talib': 39581,
 'dissipated': 27439,
 'the_previous_year_in_2011-12': 28990,
 'duped': 47543,
 'gram': 10310,
 'the_medical_institute': 39805,
 'entire': 1678,
 'farmer': 20528,
 "'giant": 49058,
 'analogously': 29753,
 'deja_vu': 14600,
 'administration': 1636,
 'slums': 11811,
 'main_building': 30548,
 "ku's_school_of_business": 17697,
 'atari': 12862,
 'phonetical': 49750,
 'chernaya': 25483,
 'inaccessible': 26912,
 'richard_sheale': 37979,
 'new_yorkese': 43678,
 'stirling': 24455,
 'equatorial_african': 29628,
 'coastline': 4516,
 'electrical_world': 30420,
 'victor_hugo': 22635,
 'southwest_ledge_lighthouse': 27192,
 'hybrid': 9068,
 'margaret_sullavan': 34522,
 'property': 812,
 'intervals': 11087,
 'marxists': 20279,
 'coldplay': 7365,
 '526': 26760,
 'pro-life': 20559,
 'upper-level': 36021,
 'falco': 27988,
 'the_end_of_glory': 38993,
 'full': 896,
 'valery_giscard': 43312,
 'enumerators': 24635,
 'utilized': 3215,
 'patent-free': 36628,
 '1997_to_2004': 30300,
 'the_area_provostdean': 45596,
 '2010-2011': 50919,
 'defent': 28260,
 'violently': 28218,
 'inhibition': 17093,
 'cluny': 14158,
 'ghandi': 50105,
 'liklihood': 48956,
 'disposable': 21260,
 'emergingdeveloping': 30115,
 'solemn': 37478,
 'contorl': 44340,
 'impeachment': 14331,
 'endeavor': 11828,
 'routed': 13650,
 'isaac_asimov': 51675,
 'simulation': 16558,
 'baymax': 26393,
 'total_war_game': 26514,
 'john_collier': 31597,
 'mercenary': 10346,
 'rbi': 19124,
 'colonizes': 51418,
 'conservative_presbyterian_church': 40042,
 'reproducing': 23445,
 'the_nhk_color': 34729,
 'unwilling': 41700,
 'the_majuro_declaration': 52222,
 'att_broadband': 21825,
 'st_pancras_international': 52671,
 'west_bromwich_albion': 34285,
 'freeway': 6869,
 'waterwheel-powered': 30963,
 'kony': 42906,
 'normality': 47197,
 'john_wesley': 12588,
 'celebrity': 5952,
 'blockading': 34235,
 'the_fars_province': 18647,
 'salah_ahmed': 46836,
 'ballet': 7028,
 'hyperinflation': 24980,
 'cones': 21350,
 'thrive': 4346,
 'millennia': 16836,
 'superhero': 10916,
 'the_eastern_civilization': 49427,
 'latino_americans': 50983,
 'ps+': 42041,
 'herman_goldstone': 27691,
 'john_harris': 33286,
 '90_minutes': 30819,
 'erectus': 40114,
 'societal': 6074,
 'davos': 50215,
 'lodges': 3170,
 'weather': 1235,
 'daylighting': 27472,
 'king_louis_xv': 51271,
 'oracle-bone': 26167,
 'pc_magazine': 24624,
 'abducted': 45287,
 'sundried': 36436,
 'windows_rt': 11844,
 'bone_society': 42389,
 'contradictory': 8135,
 'paul_vi': 1052,
 'adolpt': 26059,
 '6000_years_ago': 47876,
 'sunup': 35472,
 'conservators': 38423,
 'pigeons': 9039,
 'tom_hiddleston': 26069,
 'hinderance': 26294,
 'self-select': 30702,
 'nbc_blue': 30144,
 '1959_to_1973': 41256,
 'phychology': 50760,
 'he': 107,
 'wenchuan': 11404,
 'reformers': 6640,
 'retrospectives': 29981,
 'carotenoids': 12334,
 'east_5th_street': 38134,
 'inexperienced': 30274,
 'manifest': 15444,
 'mediatisation': 22307,
 'non-dipolar': 24556,
 'encompassed': 10592,
 'supercontinent': 8767,
 'accordance': 10627,
 'overturns': 28050,
 'destroy': 2403,
 'mainau': 36843,
 "grammy's_awards": 27991,
 'whigs': 5348,
 'the_world_championships': 50694,
 'the_uss_bogue': 41257,
 'cayuga': 36417,
 'genoa': 30976,
 'gambetta': 31038,
 '15th_century': 28840,
 'cream': 10188,
 'lorenz': 13752,
 'travaux': 45848,
 'south_tuscon': 23619,
 'ironbottom_sound': 47047,
 'senatorial': 27605,
 'stressed': 8543,
 'occupation': 1332,
 'dcsnet': 27096,
 'main_street': 37021,
 'hudon': 51320,
 'fatty': 6119,
 'liabilities': 34163,
 'san_jose_department': 34751,
 'burrough': 50043,
 'roger_de_tosny': 40262,
 'results': 1379,
 'downing': 49278,
 'north_tower': 42640,
 'texts': 1646,
 "'the_grand_concourse": 23224,
 'nicholas_steno': 27070,
 'out_one': 48327,
 'cpus': 9716,
 'blossomed': 42257,
 'marshals': 39670,
 'prosody': 48981,
 'miramar': 36057,
 'methodology': 6973,
 'cochlea': 32987,
 'the_quebec_junior_football_league': 23400,
 'super': 2500,
 'hurricanes': 7842,
 'ler_of': 44268,
 'north_carolinas_7th': 32215,
 'third-largest': 29420,
 'software_assurance': 41165,
 'between_14th_and_min_18th_century': 42527,
 'the_football_association_premier_league': 30794,
 'observances': 11477,
 'chunks': 34272,
 'wave-affiliated': 50372,
 '1983': 3786,
 'ciliary': 23818,
 'madrasahs': 14909,
 'kosher': 31340,
 'soviet_russian_republic': 25193,
 'mouth': 4561,
 'al-haramain_foundation': 48202,
 'viskuli': 28196,
 'abandoned': 3001,
 'the_constitutional_court': 15145,
 'major': 124,
 'the_turnbull_government': 43638,
 'redefine': 20482,
 'the_armistice_of_mudros': 28497,
 'tongzhi': 13675,
 'grand_central_terminal': 39328,
 'compact': 4503,
 'conclave': 16980,
 'moer': 45014,
 'january_1844': 26933,
 'latimer': 14026,
 'admirers': 11415,
 'bactrian': 17315,
 'bolinopsis': 28789,
 'assigns': 19980,
 'incapicitated': 35765,
 'hubble': 24850,
 'lain_gray': 33101,
 'evaluations': 14584,
 'frozen': 9455,
 'template': 10675,
 'eyevision': 24492,
 'afrikaans': 4996,
 'nizam': 20586,
 'demolishing': 28666,
 'aircrews': 50316,
 'often': 126,
 'wallacea': 44153,
 'at_least_seven_straight_days': 42696,
 'expained': 29152,
 'keynes': 15414,
 'bayezid': 38603,
 'mavericks': 36477,
 'rodeo': 24286,
 'stuco': 19741,
 'precautions': 20745,
 'refromed': 37046,
 'werner_heisenberg': 34165,
 'raffles_millennium_international': 39510,
 'french_army_of_chalons': 28555,
 'irreversible': 22049,
 'scandale': 47096,
 '0': 9670,
 'china': 222,
 'starwave_mobile': 48903,
 'gamesradar': 51481,
 'the_french_emperor_napoleon': 25561,
 'rabbinical': 36389,
 'westquay': 47350,
 'aec': 14260,
 'finance': 3761,
 'cajmere': 29443,
 'detroit_center': 27811,
 'soundtrack': 3918,
 'underprivileged': 25040,
 'modern_greek': 46449,
 'run-off': 45158,
 'queen_of_popular_music': 44088,
 'write-up': 39818,
 'crowded': 25485,
 'huguenot-descended': 25637,
 'wembley': 11490,
 'substrate': 27503,
 'cushitic': 37406,
 'symbollically': 47756,
 'the_dartford_sister': 42986,
 'sensible': 28694,
 'june_2010': 11536,
 'bernini': 40988,
 'steel-wire': 45475,
 'beneventum': 28801,
 'applicability': 27346,
 'high_school': 24557,
 'absorb': 3178,
 'smtp': 39880,
 'alzheimers': 28257,
 'aluminum': 5858,
 'ecologists': 17182,
 'rest-mass': 44614,
 "the_hundred_years'_war": 16739,
 'high-altitude': 26521,
 'in-flight': 25032,
 'stems': 8749,
 'terminology': 12160,
 'beomce': 45721,
 'stereotype': 23395,
 'the_library_of_congress': 18015,
 'value': 638,
 'poem': 3274,
 'the_champalimaud_foundation': 22052,
 'queen_of_england': 22205,
 'the_methodist_church': 21025,
 'porpular': 49663,
 'oriental': 13868,
 'fallibilism': 34078,
 'theodore_hesburgh_library': 33322,
 'the_late_80s': 13013,
 '1889': 9415,
 'the_sugarland_express': 18254,
 'g3p': 41736,
 'julio_iglesias': 42886,
 'bismark': 36621,
 'costs': 2933,
 'june_2012': 12501,
 'constructrion': 34701,
 'stenus': 46313,
 'tamar': 15726,
 'cargo_gateway_of_the_americas': 29055,
 'neurotansmitter': 39846,
 'fences': 17306,
 'dwipa': 26733,
 'non-verbal': 40973,
 '1_of': 47483,
 'the_new_jim_crow_mass_incarceration_in_the_age_of_colorblindness': 52545,
 'alfranj': 51484,
 'discovery': 2408,
 'togas': 24828,
 'the_charleston_police_department': 14479,
 'sexes': 25573,
 'itu-r': 14518,
 'may_2003': 37503,
 'between_1948_and_1958': 37238,
 'nuncio': 26856,
 'inmate': 46714,
 'sweeping': 16746,
 'the_act_of_1946': 37280,
 'rb': 5124,
 'censuses': 11396,
 'recommends': 13027,
 'endeavoured': 34175,
 'distinguishments': 37347,
 '2015-2016': 48111,
 'spiral_scratch': 33934,
 'assume': 3210,
 'khitan_tumens': 29490,
 'amity': 9903,
 'father_edward_sorin': 13828,
 'house_of_assembly': 48366,
 'the_dutch_navy': 28886,
 'elba': 43220,
 'free-to-air': 11607,
 'milonga': 52168,
 'bundled': 7484,
 'testing': 1080,
 'the_game_of_asscociation_football': 45765,
 'the_state_summer_residence': 24078,
 'popularized': 6063,
 'rillito_river_park': 24926,
 'milton_eisenhower': 41135,
 'experiences': 3453,
 'the_next': 20383,
 'cubbie-bear': 36105,
 'pelosi': 30479,
 'virginia_biotechnology_research': 44667,
 'conscious': 12453,
 'cultivated': 6076,
 'the_glastonbury_music': 40529,
 'useage': 23867,
 'room': 2108,
 'the_high_renaissance': 38447,
 'subsidizing': 45995,
 '68mcu': 36957,
 'george_humphrey': 51139,
 'bees': 8775,
 'writ': 35396,
 'pleasant': 50595,
 'st_helens': 36426,
 'meadwestvaco': 47288,
 'yehuda_hachasid': 39493,
 'virtue': 10550,
 'mixed_race': 28707,
 'lawrence': 7976,
 'opisthokonts': 17561,
 'inter-chip': 15871,
 'too-fast': 46857,
 'the_raleigh_cougars': 30109,
 'waihopai': 45885,
 'riding': 7951,
 'financial': 497,
 'unrest': 6305,
 'realms': 5804,
 'girls_love_beyoncé': 39498,
 'maintain': 2046,
 "'caged": 41079,
 'horner': 49953,
 'madrasah': 8888,
 'launced': 42788,
 'chalk': 23564,
 'charles_hartshorne': 24169,
 'brits': 17906,
 'treaty_of_alliance': 39692,
 'panoramic': 30958,
 'the_world_war_ii': 19088,
 'david_quammen': 52280,
 'posits': 17215,
 'arctic': 6532,
 'early_settlers': 37083,
 'adinath': 29674,
 'pariah': 20280,
 'harp': 44803,
 'unemployment': 2294,
 'dynagroove': 51997,
 'manx': 15404,
 'embezzle': 50656,
 'the_proterozoic_era': 29868,
 'connecter': 50517,
 'deg': 31974,
 'seminole': 47085,
 'the_wayback_machine': 7932,
 'journeys': 29854,
 'the_center_for_new_religions': 39622,
 'wealthy': 4239,
 'irina_margareta_nistor': 44126,
 'between_the_ice_sheets': 34306,
 'serpent': 37812,
 'royalties': 22301,
 'godfather': 21953,
 'meanings': 8023,
 '1-1': 47562,
 'honour_for_services': 33662,
 'ed_policy': 17930,
 'bbc2': 13658,
 "'the_kidnapping_of_edgardo_mortara": 21124,
 'humble': 20526,
 'pleuobrachia': 25628,
 'ray_holmes': 13605,
 '1203': 17521,
 'militarism': 18501,
 'prop': 22517,
 'the_thanksgiving_day': 47748,
 'vassilis_vryonides': 38307,
 'rescator': 46086,
 'essays': 11077,
 'kms': 46134,
 'resupplying': 49787,
 '6': 4204,
 'the_us': 5523,
 'tetragraph': 39653,
 'maney': 32210,
 'granite_peak': 37255,
 'anti-christian': 41869,
 'obstruction': 10828,
 'equilateral': 35856,
 '1870': 7129,
 'ranunculus': 47806,
 'newsstand': 51109,
 'colonialism': 5537,
 'expels': 50355,
 'the_beauty_of_durres': 27843,
 'funny': 21736,
 'remind': 10455,
 'incurs': 41922,
 'hemagglutinin': 46655,
 'startup': 11021,
 'suddhodana': 32637,
 'take_me_out_to_a_cubs_game': 43519,
 'authenticated': 47728,
 'ectoderm': 21318,
 'mp2': 51699,
 'oscar': 9161,
 'the_dc_national_guard': 25237,
 'occur': 209,
 'trident': 11763,
 'theist': 21539,
 'corrected': 8267,
 'walmart': 44608,
 'singledouble_summer_time': 50871,
 'voice-aspirated': 40077,
 'farenheit': 37123,
 'the_wagner_act': 33788,
 'almost_14_of': 27450,
 'nonprofit': 8929,
 'the_adrienne_arsht_center_for': 19877,
 'absorbs': 8538,
 'the_williams_tower': 52264,
 'hinduism': 5055,
 'diminished': 6988,
 'bomb-shelters': 40903,
 'frigid': 32756,
 'cremyll-stonehouse': 47273,
 '406_of': 38859,
 'alone': 3483,
 'motors': 1953,
 'fuly': 49326,
 'locks': 21883,
 'floing': 35685,
 '90th': 16049,
 'bielany_forest': 37904,
 'sheffield': 50896,
 'hurling': 49810,
 'vincent': 42724,
 'proportions': 15631,
 'raf_brize': 33159,
 'principalities': 7287,
 'gerald_freedman': 47321,
 'the_european_age_of': 29611,
 'the_kashmir_shaivism': 36687,
 'eager': 37540,
 'violinists': 36569,
 'instructive': 44264,
 'employ': 2847,
 '525-line': 45255,
 'manor': 15269,
 'silver_samurai': 43034,
 'tesserae': 9192,
 'gilded': 23668,
 'university_town': 20865,
 'affective-motivational': 25569,
 'specify': 5068,
 'march_of_1953': 30880,
 'vital': 4957,
 'mexican-american': 12870,
 'the_google_book_search': 46825,
 'dbase': 11294,
 'jesus_a': 31673,
 'the_forney_library': 27161,
 'zone': 1967,
 'imperialists': 36970,
 'fasted': 38271,
 'meatpacking_district': 18681,
 '11-plus': 49874,
 'vasaris': 31609,
 'sinagapore': 31503,
 'talbot_hughes': 26618,
 'frontal': 31452,
 'moths': 10078,
 'un-elected': 27392,
 'mayow': 22122,
 'voyage': 5421,
 'lawmaking': 11268,
 'career': 1442,
 'fitna': 51469,
 'overdependence': 35629,
 'objected': 8142,
 'recall': 6182,
 'east_2nd_street': 44696,
 'paleogeographic': 33185,
 'ostsiedlung': 45573,
 'castel_gofolfo': 41950,
 'southwest_asia': 47600,
 'social_policy': 32681,
 'ayyubid': 35787,
 'm1a2_abrams': 44566,
 'swakopmund': 24950,
 'enobled': 25440,
 'the_national_presbyterian_church': 50083,
 'wall-paintings': 33245,
 'insulating': 32632,
 'fatima_house': 24623,
 'hugh_wood': 51122,
 'mode': 2789,
 'mustaqbal_media_corporation': 26701,
 'akbar': 41481,
 'moleben': 18916,
 'its_first_season': 19501,
 'ecology': 8460,
 'wolf_link_amiibo': 37180,
 'fishguard': 42763,
 'interfere': 8273,
 '2008-2009': 19949,
 'the_higher_education_commission': 42050,
 'chromatophores': 51875,
 'ends': 4926,
 'mosaics': 1488,
 'lindzen': 21726,
 'byng': 41287,
 'selena': 35733,
 'between_1973_and_2015': 28914,
 'via': 1805,
 'equinut': 19121,
 'galloway': 18960,
 'exerted': 23128,
 'mandatory_palestine': 35977,
 'morelos': 44482,
 'raided': 7847,
 'deadweight': 41776,
 'the_arabian_peninsula': 13630,
 'karl_von_löesch': 25501,
 'highschools': 41224,
 'tuileries_garden': 51115,
 'overlord': 24832,
 'noticeable': 12908,
 'interdenominational': 42909,
 'opera_america': 46504,
 'laserdiscs': 4020,
 'kreep': 44530,
 'the_calendar_act': 31995,
 'als': 23229,
 'the_evanston_campus_framework_plan': 30389,
 'contributor': 7721,
 'diego_maradona': 19250,
 'accumulating': 14607,
 'ironing': 34053,
 'cd-r': 38794,
 'waqfs': 37438,
 'espanyol': 6208,
 'stratigraphy': 30604,
 'neo_plasticism': 34083,
 'arthurian': 40455,
 'many_years_older': 33532,
 'postmodernism': 31257,
 'fibre': 22571,
 'm5': 51440,
 'mengjiang': 44333,
 'the_sedition_act': 29610,
 'stoning': 28386,
 'slate': 9268,
 'september_2006': 18770,
 'birding': 25192,
 'rewe-zentrale_ag': 34595,
 'emulators': 15407,
 'jessica_sanchez': 14638,
 'republic_day': 17298,
 'sanitarium': 15180,
 'hamaynkner': 33905,
 'parchments': 25320,
 'the_detroit_medical_center': 25295,
 'brigadier': 25991,
 'paula_abdul': 10776,
 'pre-christian': 38804,
 'relaunch': 23550,
 'said': 241,
 'resume': 5739,
 'world_war_i': 3007,
 'mediates': 48141,
 'israelites': 15868,
 'chhauni_silkhana': 36121,
 'the_kings_court': 27653,
 'documentary': 4053,
 'lexical': 18821,
 'origional': 50143,
 'adaptively': 23869,
 'guinean': 50622,
 'legumes': 15917,
 'london-based': 22642,
 'the_indian_rebellion': 7477,
 'simplified': 10818,
 'palaeopolis': 46446,
 'dragon': 14312,
 'beer-like': 24999,
 'gou': 14333,
 'titles': 1826,
 'pan-slavic': 42858,
 'regis': 50671,
 'the_city_government': 35866,
 'driven': 5248,
 'ratification': 8933,
 'museums': 4267,
 "'tamari_ostia": 52256,
 'maradona': 24694,
 "'instrumentalism": 31846,
 'mdna': 10624,
 'asympotic': 39004,
 'paulinella': 44436,
 'the_federalist_papers': 47506,
 'retests': 44492,
 'loyalties': 17714,
 'scientism': 20298,
 'archived': 19105,
 'monatomic': 40002,
 'the_day_of_the_175th_anniversary': 37326,
 'four_months': 16646,
 '500-1400': 29400,
 'two-phased': 36317,
 'proficient': 12470,
 'descended': 2752,
 'governatorate': 33821,
 'fim': 49548,
 'watermill': 25353,
 'aboral': 14792,
 'no_boundaries': 46312,
 'mostly-unplanned': 41921,
 'historicism': 24106,
 'frederick_zugibe': 38105,
 'environmentin': 32150,
 'nazis': 5490,
 'transactions': 9545,
 'heritage': 2633,
 'odc': 52124,
 'vaccum': 40839,
 'suez_canal': 8998,
 'beara': 36751,
 'one-charger-fits-all': 41314,
 'cary': 24596,
 'demonized': 34864,
 'ifpi': 46743,
 'liveable': 9432,
 'renaissance_humanism': 37697,
 'robert_j_shiller': 32423,
 'the_greatest_hits_compilation': 32065,
 'foreshore': 22468,
 'bronx': 888,
 'donald_davies_develop': 27535,
 'doson': 18578,
 'nizari': 49061,
 'radioactivity': 38173,
 'emerge': 3061,
 'james_lovell': 28070,
 'boer': 34859,
 'object-relational': 23251,
 'readings': 8649,
 'benigno_andrade': 42285,
 'chickamauga': 41341,
 'pla_north_east': 32388,
 '1922_to_1954': 43443,
 'saint_fm': 8831,
 'friars': 5634,
 'floating': 10174,
 'hamlet': 20519,
 'creationism': 37087,
 'climbed': 11598,
 'sh': 20809,
 'four-note': 15857,
 'roland_faber': 43382,
 'magnetoception': 29391,
 'replicate': 11605,
 'usb_implementers_forum': 38385,
 'js_bach': 24889,
 'imperial_college_union': 29795,
 'tranformers': 37120,
 'every_7700_years': 49984,
 'burns': 17504,
 'days': 1748,
 'cittern': 30375,
 'arr': 26682,
 'the_detroit_opera_house': 32341,
 'balta_liman': 18837,
 'black_sabbath': 14742,
 'the_ashkenazi': 48406,
 'aeolic': 40822,
 'somewhat': 6266,
 'peter_jennings': 27918,
 'merriam-webster': 17062,
 'landlords': 12480,
 'eople': 24672,
 'circuit': 1963,
 'imperial_university': 35620,
 'the_month_of_august': 19875,
 'phil_skinner': 16068,
 'scheme': 2462,
 'the_beichuan_fault': 17473,
 'everett-seattle': 42069,
 'multi-party': 19130,
 'the_great_gatsby': 21515,
 "people's_deputies": 39140,
 'intuitive': 31363,
 'spanish_america': 17483,
 'pieces': 2078,
 'baths': 13622,
 'circuitous': 42116,
 'executive_vp_of_label_strategy': 30679,
 'sponsorship': 6227,
 'subdivisions': 26872,
 'proprietors': 49581,
 'postponed': 12958,
 'the_zhejiang_communist_party': 47115,
 'stunt': 14713,
 'subway': 4306,
 'discus': 49044,
 'loops': 23107,
 'localizing': 15729,
 'circassians': 43471,
 '1960s': 13042,
 'the_scientific_revolution': 9284,
 'teresa': 22672,
 'runner-up': 47048,
 'xylem': 25700,
 'germania_inferior': 37730,
 ...}

In [11]:
tokenized_indexed_sentences[0]


Out[11]:
[1, 3, 2206, 9, 388, 498, 93, 108, 5, 0]

In [12]:
tokenized_indexed_sentences = [np.array(sentence) + 1 for sentence in tokenized_indexed_sentences if len(sentence) > 0]

In [13]:
tokenized_indexed_sentences[0]


Out[13]:
array([   2,    4, 2207,   10,  389,  499,   94,  109,    6,    1])

In [14]:
new_weights = np.zeros((1, word2vec_weights.shape[1]))

In [15]:
new_weights = np.append(new_weights, word2vec_weights, axis=0)

In [16]:
new_weights.shape


Out[16]:
(52731, 300)

In [17]:
new_weights[52730]


Out[17]:
array([ 0.35742188,  0.03369141, -0.03881836,  0.07666016, -0.06079102,
        0.6328125 ,  0.05615234,  0.04345703,  0.00265503, -0.21582031,
        0.40234375, -0.0559082 , -0.15820312,  0.21289062,  0.28710938,
        0.54296875, -0.13085938,  0.14746094,  0.06738281, -0.171875  ,
        0.07373047, -0.0006485 , -0.10986328, -0.13476562,  0.06152344,
       -0.03833008, -0.07519531, -0.00221252,  0.09179688, -0.37890625,
       -0.31054688, -0.07666016, -0.484375  , -0.0546875 , -0.13183594,
       -0.33203125,  0.20996094,  0.25      ,  0.0534668 ,  0.08496094,
       -0.1875    ,  0.09960938,  0.24902344, -0.07714844, -0.01123047,
       -0.06787109,  0.21191406, -0.11865234, -0.01660156,  0.22265625,
       -0.37695312,  0.36914062, -0.51171875,  0.06640625, -0.19726562,
       -0.01818848,  0.0612793 , -0.21582031,  0.13574219, -0.08154297,
        0.18652344,  0.3203125 ,  0.26367188,  0.24609375,  0.01208496,
        0.04931641,  0.18652344,  0.29296875,  0.21289062,  0.06884766,
        0.13476562, -0.17480469, -0.02246094,  0.25195312,  0.02380371,
       -0.00354004,  0.09228516,  0.1953125 , -0.07763672, -0.13867188,
        0.05175781,  0.17578125, -0.02124023, -0.38476562, -0.16992188,
       -0.12597656, -0.11376953,  0.13671875, -0.06835938, -0.00921631,
        0.04394531, -0.27148438, -0.45703125, -0.08837891,  0.04321289,
        0.15332031,  0.1796875 , -0.02099609,  0.20507812, -0.05688477,
        0.10839844, -0.12011719,  0.203125  , -0.31054688,  0.28125   ,
       -0.23828125, -0.44921875, -0.02966309,  0.19628906, -0.36523438,
        0.05761719,  0.15527344,  0.10742188,  0.23242188, -0.13085938,
        0.14257812, -0.32617188,  0.09423828,  0.32421875,  0.00183868,
        0.20703125,  0.203125  ,  0.07617188, -0.17285156, -0.10449219,
        0.15136719, -0.06542969,  0.11083984,  0.04956055,  0.25585938,
       -0.10302734, -0.08886719, -0.23925781,  0.11328125,  0.14941406,
       -0.03833008,  0.12402344,  0.13085938,  0.19433594,  0.03613281,
        0.0072937 , -0.0612793 , -0.20703125,  0.38867188,  0.12988281,
        0.23925781, -0.36523438,  0.265625  , -0.50390625,  0.21679688,
        0.26367188,  0.05566406, -0.26757812,  0.23632812, -0.171875  ,
       -0.11181641,  0.16796875, -0.23925781, -0.02380371,  0.10400391,
       -0.20117188,  0.12402344, -0.10546875,  0.02233887,  0.15234375,
        0.31640625,  0.05322266, -0.20214844,  0.13769531,  0.00442505,
        0.14550781, -0.0703125 ,  0.17382812, -0.18457031, -0.21191406,
        0.07275391, -0.1640625 , -0.01660156, -0.0019989 ,  0.01361084,
        0.04223633, -0.01116943,  0.05786133, -0.13378906, -0.24804688,
       -0.38085938, -0.03100586, -0.10839844,  0.21386719,  0.03686523,
       -0.11279297,  0.34765625,  0.20507812, -0.14941406,  0.05175781,
       -0.06445312, -0.16210938, -0.15917969, -0.12792969, -0.18554688,
       -0.08007812,  0.02526855, -0.17285156,  0.14550781,  0.1015625 ,
        0.04321289, -0.19433594,  0.10107422,  0.11279297, -0.14453125,
       -0.53125   , -0.21679688, -0.02258301, -0.09277344, -0.02612305,
        0.14453125,  0.10839844, -0.23242188, -0.34765625, -0.09423828,
        0.19726562,  0.06054688,  0.11230469, -0.16894531, -0.21972656,
        0.12011719,  0.18164062, -0.0177002 , -0.18359375, -0.22070312,
        0.09863281, -0.06787109, -0.36132812,  0.09423828, -0.00601196,
        0.38085938,  0.2890625 , -0.02661133, -0.14550781,  0.02832031,
        0.3984375 , -0.1328125 ,  0.09960938,  0.04882812,  0.03466797,
       -0.05078125,  0.25390625,  0.09716797,  0.06152344, -0.16210938,
       -0.11523438,  0.07861328,  0.47851562,  0.546875  ,  0.05859375,
       -0.15039062, -0.08398438, -0.22070312,  0.25195312,  0.14355469,
       -0.18457031, -0.22851562, -0.11767578,  0.21386719, -0.14746094,
        0.02307129, -0.20019531, -0.09423828,  0.1015625 , -0.04589844,
       -0.09472656,  0.03320312, -0.06835938,  0.05566406,  0.30273438,
       -0.0456543 , -0.02111816,  0.18847656,  0.33007812,  0.3984375 ,
        0.12695312, -0.03173828,  0.35742188, -0.12792969,  0.28320312,
       -0.17773438, -0.29101562, -0.10839844, -0.13183594,  0.15527344,
        0.20800781,  0.2734375 , -0.14355469, -0.11865234, -0.16699219,
       -0.04101562, -0.03564453,  0.00174713, -0.0859375 , -0.17773438])

generating training data


In [18]:
window_size = 5
vocab_size = len(word2index)
print(vocab_size)


52730

In [19]:
maxlen = max([len(sentence) for sentence in tokenized_indexed_sentences])

In [20]:
tokenized_indexed_sentences = sequence.pad_sequences(tokenized_indexed_sentences)

In [24]:
len(tokenized_indexed_sentences[0])


Out[24]:
42

In [25]:
seq_in = []
seq_out = []
# generating dataset
tokenized_indexed_sentences = [sentence for sentence in tokenized_indexed_sentences if len(sentence) > 0]
for sentence in tokenized_indexed_sentences:
    x = sentence
    y = np.append(sentence[1:], np.array(sentence[len(sentence)-1]))
    seq_in.append(x)
    seq_out.append([new_weights[index] for index in y])

# converting seq_in and seq_out into numpy array
seq_in = np.array(seq_in)
seq_out = np.array(seq_out)
n_samples = len(seq_in)
print ("Number of samples : ", n_samples)


Number of samples :  97974

Defining model


In [26]:
# Changes to the model to be done here
model = Sequential()
model.add(Embedding(input_dim=new_weights.shape[0], output_dim=new_weights.shape[1], weights=[new_weights], mask_zero=True))
model.add(LSTM(1024, return_sequences=True))
model.add(LSTM(1024, return_sequences=True))
model.add(LSTM(300, return_sequences=True))
model.load_weights("../weights/lstm-3-1024-1024-batchsize-512-epochs-30-Sequence/weights.29.hdf5")
model.compile(loss='cosine_proximity', optimizer='adam',metrics=['accuracy'])
model.summary()


_________________________________________________________________
Layer (type)                 Output Shape              Param #   
=================================================================
embedding_1 (Embedding)      (None, None, 300)         15819300  
_________________________________________________________________
lstm_1 (LSTM)                (None, None, 1024)        5427200   
_________________________________________________________________
lstm_2 (LSTM)                (None, None, 1024)        8392704   
_________________________________________________________________
lstm_3 (LSTM)                (None, None, 300)         1590000   
=================================================================
Total params: 31,229,204
Trainable params: 31,229,204
Non-trainable params: 0
_________________________________________________________________

In [27]:
model_weights_path = "../weights/lstm-3-1024-1024-batchsize-512-epochs-30-Sequence"
if not os.path.exists(model_weights_path):
    os.makedirs(model_weights_path)
checkpoint_path = model_weights_path + '/weights.{epoch:02d}.hdf5'
checkpoint = ModelCheckpoint(filepath=checkpoint_path, verbose=1, save_best_only=False, mode='max')

Train Model


In [28]:
# model.fit(seq_in, seq_out, epochs=30, verbose=1, batch_size=512, callbacks=[checkpoint])

In [95]:
accuracy = model.evaluate(seq_in, seq_out, verbose=0)

model predict


In [ ]:


In [96]:
accuracy


Out[96]:
[-0.00093102607380229658, 0.12032599566464017]

In [ ]:


In [99]:
start = 0
sentence_test = "how is the "
indexed_sentences = embeddings.get_indexed_query(sentence_test)
print("indexed_sentences ",indexed_sentences)
sent = np.array(indexed_sentences) + 1
#pattern = list(seq_in[start])
pattern = list(sent)
print("\"",' '.join(index2word[index] for index in pattern))
for i in range(4):
    prediction = model.predict(np.array([pattern]))
    pred_word = word2vec_model.similar_by_vector(prediction[0][prediction.shape[1] - 1])[0][0]
    sys.stdout.write(pred_word+" ")
    pattern.append(word2index[pred_word])
    pattern = pattern[:len(pattern)]


indexed_sentences  [1, 11, 8, 2]
" squadstart how is the
consociational used for the 

In [ ]:
================================
# 1word
which nfl team represented the afc
what day the game

In [46]:
[[ 0.18412007  0.04542543  0.21795343 ...,  0.05103429  0.10333754
  -0.24055868]
 [ 0.06989234  0.02780185  0.18792398 ..., -0.0966404   0.1660459
   0.02869556]
 [-0.14092228 -0.10479636 -0.00315017 ..., -0.04664698 -0.0193012
  -0.18405525]
 ..., 
 [ 0.03898042  0.03138988  0.01595723 ..., -0.04284016  0.00147492 -0.        ]
 [-0.00096082 -0.00380996  0.01627631 ..., -0.01853406  0.01832031
  -0.00879718]
 [ 0.02848312  0.0082074   0.02930731 ..., -0.02281111 -0.00461099
  -0.00617669]]
chedi [[ 0.18412007  0.04542543  0.21795343 ...,  0.05103429  0.10333754
  -0.24055868]
 [ 0.06989234  0.02780185  0.18792398 ..., -0.0966404   0.1660459
   0.02869556]
 [-0.14092228 -0.10479636 -0.00315017 ..., -0.04664698 -0.0193012
  -0.18405525]
 ..., 
 [-0.00096082 -0.00380996  0.01627631 ..., -0.01853406  0.01832031
  -0.00879718]
 [ 0.02848312  0.0082074   0.02930731 ..., -0.02281111 -0.00461099
  -0.00617669]
 [ 0.00630879 -0.00051082  0.00335852 ..., -0.00979137  0.01898552 -0.        ]]
palace [[ 0.18412007  0.04542543  0.21795343 ...,  0.05103429  0.10333754
  -0.24055868]
 [ 0.06989234  0.02780185  0.18792398 ..., -0.0966404   0.1660459
   0.02869556]
 [-0.14092228 -0.10479636 -0.00315017 ..., -0.04664698 -0.0193012
  -0.18405525]
 ..., 
 [ 0.02848312  0.0082074   0.02930731 ..., -0.02281111 -0.00461099
  -0.00617669]
 [ 0.00630879 -0.00051082  0.00335852 ..., -0.00979137  0.01898552 -0.        ]
 [-0.01971216 -0.00750435  0.00110365 ..., -0.00632941 -0.00917979
  -0.03102284]]
as [[ 0.18412007  0.04542543  0.21795343 ...,  0.05103429  0.10333754
  -0.24055868]
 [ 0.06989234  0.02780185  0.18792398 ..., -0.0966404   0.1660459
   0.02869556]
 [-0.14092228 -0.10479636 -0.00315017 ..., -0.04664698 -0.0193012
  -0.18405525]
 ..., 
 [ 0.00630879 -0.00051082  0.00335852 ..., -0.00979137  0.01898552 -0.        ]
 [-0.01971216 -0.00750435  0.00110365 ..., -0.00632941 -0.00917979
  -0.03102284]
 [-0.00495647  0.00307422  0.00146239 ..., -0.00513899  0.01176982
  -0.0073432 ]]
polynesian [[ 0.18412007  0.04542543  0.21795343 ...,  0.05103429  0.10333754
  -0.24055868]
 [ 0.06989234  0.02780185  0.18792398 ..., -0.0966404   0.1660459
   0.02869556]
 [-0.14092228 -0.10479636 -0.00315017 ..., -0.04664698 -0.0193012
  -0.18405525]
 ..., 
 [-0.01971216 -0.00750435  0.00110365 ..., -0.00632941 -0.00917979
  -0.03102284]
 [-0.00495647  0.00307422  0.00146239 ..., -0.00513899  0.01176982
  -0.0073432 ]
 [ 0.06350882 -0.06063896  0.04302299 ..., -0.00506015 -0.33358458
  -0.0859012 ]]
interrupting [[ 0.18412007  0.04542543  0.21795343 ...,  0.05103429  0.10333754
  -0.24055868]
 [ 0.06989234  0.02780185  0.18792398 ..., -0.0966404   0.1660459
   0.02869556]
 [-0.14092228 -0.10479636 -0.00315017 ..., -0.04664698 -0.0193012
  -0.18405525]
 ..., 
 [-0.00495647  0.00307422  0.00146239 ..., -0.00513899  0.01176982
  -0.0073432 ]
 [ 0.06350882 -0.06063896  0.04302299 ..., -0.00506015 -0.33358458
  -0.0859012 ]
 [ 0.08807422 -0.09903663  0.09095348 ...,  0.09129792 -0.60016239
  -0.20244008]]
interrupting [[ 0.18412007  0.04542543  0.21795343 ...,  0.05103429  0.10333754
  -0.24055868]
 [ 0.06989234  0.02780185  0.18792398 ..., -0.0966404   0.1660459
   0.02869556]
 [-0.14092228 -0.10479636 -0.00315017 ..., -0.04664698 -0.0193012
  -0.18405525]
 ..., 
 [ 0.06350882 -0.06063896  0.04302299 ..., -0.00506015 -0.33358458
  -0.0859012 ]
 [ 0.08807422 -0.09903663  0.09095348 ...,  0.09129792 -0.60016239
  -0.20244008]
 [ 0.06299915 -0.07275582  0.10243553 ...,  0.13952872 -0.53645998
  -0.18396612]]
interrupting [[ 0.18412007  0.04542543  0.21795343 ...,  0.05103429  0.10333754
  -0.24055868]
 [ 0.06989234  0.02780185  0.18792398 ..., -0.0966404   0.1660459
   0.02869556]
 [-0.14092228 -0.10479636 -0.00315017 ..., -0.04664698 -0.0193012
  -0.18405525]
 ..., 
 [ 0.08807422 -0.09903663  0.09095348 ...,  0.09129792 -0.60016239
  -0.20244008]
 [ 0.06299915 -0.07275582  0.10243553 ...,  0.13952872 -0.53645998
  -0.18396612]
 [ 0.04037373 -0.05459753  0.09878035 ...,  0.14784373 -0.45596743
  -0.15770634]]
interrupting [[ 0.18412007  0.04542543  0.21795343 ...,  0.05103429  0.10333754
  -0.24055868]
 [ 0.06989234  0.02780185  0.18792398 ..., -0.0966404   0.1660459
   0.02869556]
 [-0.14092228 -0.10479636 -0.00315017 ..., -0.04664698 -0.0193012
  -0.18405525]
 ..., 
 [ 0.06299915 -0.07275582  0.10243553 ...,  0.13952872 -0.53645998
  -0.18396612]
 [ 0.04037373 -0.05459753  0.09878035 ...,  0.14784373 -0.45596743
  -0.15770634]
 [-0.0033474  -0.03021199  0.08110242 ...,  0.15047802 -0.30225605
  -0.0890971 ]]
nerves [[ 0.18412007  0.04542543  0.21795343 ...,  0.05103429  0.10333754
  -0.24055868]
 [ 0.06989234  0.02780185  0.18792398 ..., -0.0966404   0.1660459
   0.02869556]
 [-0.14092228 -0.10479636 -0.00315017 ..., -0.04664698 -0.0193012
  -0.18405525]
 ..., 
 [ 0.04037373 -0.05459753  0.09878035 ...,  0.14784373 -0.45596743
  -0.15770634]
 [-0.0033474  -0.03021199  0.08110242 ...,  0.15047802 -0.30225605
  -0.0890971 ]
 [-0.02403891 -0.00142898  0.04814288 ...,  0.11747674 -0.02676389
   0.0047134 ]]


  File "<ipython-input-46-432fa0cc0048>", line 1
    [[ 0.18412007  0.04542543  0.21795343 ...,  0.05103429  0.10333754
                            ^
SyntaxError: invalid syntax

In [ ]:
#e_model = embeddings.get_model()

In [ ]:
#e_model.similar_by_word("profitabl")

Accuracy


In [ ]:
def accuracy():
    count = 0
    correct = 0
    for sub_sample_in, sub_sample_out in zip(seq_in, seq_out):
        ypred = model.predict_on_batch(np.expand_dims(sub_sample_in, axis=0))[0]
        ytrue = sub_sample_out
        pred_word = word2vec_model.similar_by_vector(ypred)[0][0]
        true_word = word2vec_model.similar_by_vector(ytrue)[0][0]
        similarity = word2vec_model.similarity(pred_word, true_word)
        if similarity == 1:
            correct += 1
        count += 1
    print("Accuracy {0}".format(correct/count))

In [ ]:
#seq_out[0]

In [ ]:
# accuracy()

In [ ]:
#model_results = model_fit_summary.history

In [ ]:
#model_results.update(model_fit_summary.params)

In [ ]:
#model_results["train_accuracy"] = accuracy()

In [ ]:
# n = no. of predictions
# accuracy = accuracy(400)
#print(model_results)

In [ ]:
#text_file_path = "../weights/lstm-2-1024-512-batchsize-128-epochs-25/model_results.json"

In [ ]:
#with open(text_file_path, "w") as f:
        #json.dump(model_results, f)

In [108]:
layer1_weights = model.layers[1].get_weights()[0]

In [118]:
layer1_weights.shape


Out[118]:
(300, 4096)

In [ ]:


In [115]:
import seaborn as sns
import matplotlib.pyplot as plt 
%matplotlib inline

In [134]:
trace = go.Heatmap(z=layer1_weights)
data=[trace]
py.iplot(data, filename='labelled-heatmap')


The draw time for this plot will be slow for all clients.
/home/nishant/venvs/autofill/lib/python3.5/site-packages/plotly/api/v1/clientresp.py:40: UserWarning:

Estimated Draw Time Too Long

Out[134]:

In [ ]:
layer1_weights = model.layers[1].get_weights()[1]
trace = go.Heatmap(z=layer1_weights)
data=[trace]
py.iplot(data)

In [ ]:


In [131]:
plt.figure(figsize=(1024, 10))


Out[131]:
<matplotlib.figure.Figure at 0x7f0f3add3ac8>
<matplotlib.figure.Figure at 0x7f0f3add3ac8>

In [132]:
sns.heatmap(layer1_weights[:50, :50], )


Out[132]:
<matplotlib.axes._subplots.AxesSubplot at 0x7f0f5d3ddf98>

In [ ]:


In [ ]: