notebook.community

Edit and run



In [133]:

    
# Imports 
import pandas as pd
import numpy as np
import tensorflow as tf
from nltk.corpus import stopwords
import re
from collections import Counter
import operator
from tensorflow.python.layers.core import Dense



In [134]:

    
def read_reviews():
    reviews = pd.read_csv("./Datasets/Reviews/Reviews.csv")
    reviews = reviews.dropna()
    reviews = reviews.drop(["Id","ProductId","UserId","ProfileName","HelpfulnessNumerator","HelpfulnessDenominator","Score","Time"]
                 ,axis=1)
    return reviews

reviews = read_reviews()
reviews.head()









    Out[134]:







  
    
      
      Summary
      Text
    
  
  
    
      0
      Good Quality Dog Food
      I have bought several of the Vitality canned d...
    
    
      1
      Not as Advertised
      Product arrived labeled as Jumbo Salted Peanut...
    
    
      2
      "Delight" says it all
      This is a confection that has been around a fe...
    
    
      3
      Cough Medicine
      If you are looking for the secret ingredient i...
    
    
      4
      Great taffy
      Great taffy at a great price.  There was a wid...



In [135]:

    
reviews[reviews.isnull().any(axis=1)] # All cells have values









    Out[135]:







  
    
      
      Summary
      Text



In [136]:

    
# Cleaning and Normalizing the text and summaries
# Some contraction to expansion
contractions = { 
"ain't": "am not",
"aren't": "are not",
"can't": "cannot",
"can't've": "cannot have",
"'cause": "because",
"could've": "could have",
"couldn't": "could not",
"couldn't've": "could not have",
"didn't": "did not",
"doesn't": "does not",
"don't": "do not",
"hadn't": "had not",
"hadn't've": "had not have",
"hasn't": "has not",
"haven't": "have not",
"he'd": "he would",
"he'd've": "he would have",
"he'll": "he will",
"he's": "he is",
"how'd": "how did",
"how'll": "how will",
"how's": "how is",
"i'd": "i would",
"i'll": "i will",
"i'm": "i am",
"i've": "i have",
"isn't": "is not",
"it'd": "it would",
"it'll": "it will",
"it's": "it is",
"let's": "let us",
"ma'am": "madam",
"mayn't": "may not",
"might've": "might have",
"mightn't": "might not",
"must've": "must have",
"mustn't": "must not",
"needn't": "need not",
"oughtn't": "ought not",
"shan't": "shall not",
"sha'n't": "shall not",
"she'd": "she would",
"she'll": "she will",
"she's": "she is",
"should've": "should have",
"shouldn't": "should not",
"that'd": "that would",
"that's": "that is",
"there'd": "there had",
"there's": "there is",
"they'd": "they would",
"they'll": "they will",
"they're": "they are",
"they've": "they have",
"wasn't": "was not",
"we'd": "we would",
"we'll": "we will",
"we're": "we are",
"we've": "we have",
"weren't": "were not",
"what'll": "what will",
"what're": "what are",
"what's": "what is",
"what've": "what have",
"where'd": "where did",
"where's": "where is",
"who'll": "who will",
"who's": "who is",
"won't": "will not",
"wouldn't": "would not",
"you'd": "you would",
"you'll": "you will",
"you're": "you are"
}
def normalization(review,remove_stopwords=False):
    text = review.lower()
    clean_text = []
    for word in text.split():
        if word in contractions:
            clean_text.append(contractions[word])
        else:
            clean_text.append(word)
    text = " ".join(clean_text)
    
    # Format words and remove unwanted characters
#     text = re.sub(r'https?:\/\/.*[\r\n]*', '', text, flags=re.MULTILINE)
    text = re.sub(r'https', ' ', text)
    text = re.sub(r'\<a href', ' ', text)
    text = re.sub(r'&amp;', '', text) 
    text = re.sub(r'[_"\-;%()|+&=*%.,!?:#$@\[\]/]', ' ', text)
    text = re.sub(r'<br', ' ', text)
    text = re.sub(r'/>', ' ', text)
    text = re.sub(r'>', ' ', text)
    text = re.sub(r'<', ' ', text)
    text = re.sub(r'`', ' ', text)
    text = re.sub(r'\'', ' ', text)
    
    if remove_stopwords:
        text = text.split()
        stops = set(stopwords.words("english"))
        text = [w for w in text if not w in stops]
        text = " ".join(text)

    return text



In [137]:

    
normalization(reviews.Text[713])









    Out[137]:





'   http   www amazon com gp product b000gwlugu  plocky s tortilla chips  red beans  n rice  7 ounce bag  pack of 12   a  i first tasted these chips while visiting relatives in ky  they are not available where i live  so i ordered them from amazon  wow  my friends and family are all addicted to them  the spicy flavor grabs you at the first bite  once a bag is open  it is gone '



In [138]:

    
def clean_reviews(texts):
    return [normalization(text) for text in texts]



In [139]:

    
summary = clean_reviews(reviews.Summary)
text = clean_reviews(reviews.Text)



In [140]:

    
print("None count in Summary ",sum(x is None for x in summary))
print("None count in Text ",sum(x is None for x in text))
print(len(summary),len(text))









    



None count in Summary  0
None count in Text  0
568412 568412



In [141]:

    
#Counting the words in Text and summary and remove words having count less than threshold
def get_word_count(texts,summaries,threshold=20):
    '''
    Params: Tests , Summaries ,threshold = 20
    Return : word count dict
    '''
    tokens = []
    for text in texts:
        tokens.extend(text.split())
    for summary in summaries:
        tokens.extend(summary.split())
    counts = Counter(tokens)
    reduced_count = {word:i for word,i in counts.items() if i >= threshold}
    return reduced_count



In [142]:

    
count = get_word_count(text,summary)



In [143]:

    
count









    Out[143]:





{'101012': 24,
 'quanities': 22,
 'sift': 141,
 'tisane': 43,
 'cuisine': 1445,
 'plantation': 189,
 'smoothed': 78,
 'pt': 41,
 'mamma': 37,
 'parmasean': 33,
 'peta': 23,
 'rehab': 84,
 'id': 319,
 'rack': 367,
 'stashed': 113,
 'actor': 37,
 'flowery': 360,
 'newsletter': 72,
 'ginormous': 46,
 'horrible': 6934,
 'thrifty': 43,
 'stiff': 497,
 'nooks': 35,
 'infections': 675,
 'nose': 2675,
 'ppl': 42,
 'supporting': 317,
 'roberts': 27,
 'copied': 93,
 'dong': 78,
 'deceived': 128,
 '16lb': 21,
 'cu': 38,
 'epic': 123,
 'unhelpful': 33,
 'intervention': 71,
 'borders': 114,
 'cautious': 439,
 'bumblebee': 57,
 '75': 1823,
 'coopers': 37,
 'cools': 257,
 'devilish': 20,
 'lactic': 141,
 'microwaved': 312,
 '3lbs': 73,
 'browned': 319,
 'tourist': 55,
 'values': 558,
 'formaldehyde': 53,
 'shrooms': 26,
 'pickles': 581,
 'mannitol': 30,
 'optional': 281,
 'hormones': 345,
 'cured': 498,
 'efficacious': 20,
 'lengthwise': 37,
 'introduced': 2363,
 'chubby': 334,
 'gatherings': 111,
 'unadulterated': 152,
 'craisins': 83,
 'incidents': 64,
 'thirsty': 484,
 'wants': 3270,
 'suave': 54,
 'audio': 69,
 'minimalist': 22,
 'zp': 114,
 'volcanic': 67,
 'nam': 30,
 'founded': 140,
 'foamed': 34,
 'aficionado': 211,
 'patterns': 51,
 'diane': 30,
 'poisons': 102,
 'decaf': 11252,
 'pimples': 50,
 'lotus': 271,
 'acknowledgement': 27,
 'aerogrow': 99,
 'kneaded': 28,
 'thee': 128,
 'relevant': 83,
 'cooperative': 40,
 'suspects': 36,
 'noone': 51,
 'erupt': 22,
 'physically': 260,
 'knife': 879,
 'dished': 20,
 'hosting': 58,
 'chair': 262,
 'not': 632350,
 'dehydrator': 88,
 'mfg': 68,
 'wring': 28,
 'diapers': 153,
 'scoured': 39,
 'apposed': 23,
 'appletinis': 27,
 '911': 34,
 'asleep': 756,
 'fronts': 25,
 'beauties': 70,
 'sugary': 2661,
 'jd': 23,
 'unobtrusive': 24,
 'joints': 584,
 'pizazz': 38,
 'confidence': 417,
 '199': 39,
 'grin': 108,
 'freezed': 25,
 'logistics': 29,
 'individuals': 334,
 'granddaughter': 409,
 'sherry': 162,
 'ale': 1054,
 'marks': 396,
 'unsupervised': 43,
 'ganoderma': 57,
 'rocked': 58,
 'swore': 78,
 'hormone': 182,
 'nutrious': 36,
 'lint': 23,
 'carcinogenic': 121,
 'dollars': 3416,
 'dehydrated': 1259,
 'smile': 819,
 'blossomed': 25,
 'albanese': 114,
 'vulnerable': 30,
 'seemingly': 348,
 'necessities': 30,
 'bella': 415,
 'separates': 164,
 'sayers': 22,
 'shaking': 683,
 'wk': 26,
 'thousands': 351,
 'likley': 25,
 'wadded': 20,
 'monophosphate': 34,
 'inaccurate': 105,
 'saag': 36,
 'serotonin': 28,
 'hbp': 21,
 'disastrous': 25,
 'cruel': 68,
 'penne': 284,
 'weaning': 110,
 'jambalaya': 81,
 'uniformity': 72,
 'stabilizers': 25,
 'freely': 183,
 'equaled': 32,
 'prayers': 81,
 'dive': 139,
 'cosmic': 196,
 'canteen': 20,
 'duo': 32,
 'interact': 63,
 'forgiving': 116,
 'home': 20631,
 'aforementioned': 176,
 '30mins': 20,
 'fannings': 44,
 'carmine': 38,
 'smearing': 35,
 'shrugged': 26,
 'bha': 132,
 'beetle': 52,
 'zotz': 64,
 'peterson': 392,
 'amz': 58,
 'dissolved': 540,
 'seasonal': 576,
 'ca': 660,
 'abounds': 34,
 'b0029xlh4y': 39,
 'toooo': 35,
 'allowing': 589,
 'cheapest': 1329,
 'casei': 57,
 'dominate': 130,
 'believed': 365,
 'caponata': 22,
 'stimulated': 36,
 'manufacturer': 3067,
 'disguises': 20,
 'gg': 45,
 'maypo': 65,
 'couldn': 210,
 'hadnt': 28,
 'thin': 6135,
 'gifted': 135,
 'ilk': 34,
 'wrestle': 57,
 'tripping': 52,
 'upload': 58,
 'shortcuts': 54,
 'prepacked': 25,
 'colon': 237,
 'craved': 109,
 'apologies': 64,
 'floated': 54,
 'cleanser': 282,
 'generating': 31,
 'waved': 20,
 'ewwwww': 27,
 'spraying': 152,
 'mischief': 220,
 'chilled': 927,
 'treatie': 20,
 'recycling': 267,
 'limits': 282,
 'offend': 80,
 'hg': 20,
 'stall': 40,
 'phone': 742,
 'respond': 391,
 'clover': 361,
 'strenght': 33,
 'scottie': 73,
 'eldest': 49,
 'chock': 282,
 'tooo': 62,
 '3yr': 43,
 'awfully': 220,
 'caster': 21,
 'smokes': 64,
 '1oz': 307,
 'acceptance': 54,
 'ka': 95,
 'fantasicakes': 24,
 'stale': 7342,
 'starbucks': 12667,
 'spunky': 30,
 '35g': 78,
 'blackcurrant': 217,
 'muffins': 2044,
 'boycotting': 43,
 'albertson': 61,
 'repellant': 47,
 'shepards': 27,
 'entrance': 59,
 'nauseated': 99,
 'maldon': 81,
 'abundant': 288,
 '50c': 37,
 'contaminant': 28,
 'research': 5407,
 'liquorice': 179,
 'figgy': 28,
 'billy': 48,
 'mirin': 73,
 'plastics': 144,
 'jagged': 51,
 'carbquik': 32,
 'unsulfured': 31,
 'loosened': 30,
 'savior': 59,
 'tjs': 24,
 'distaste': 35,
 'flowing': 170,
 'yummi': 45,
 'decaffeination': 53,
 'link': 1567,
 'revise': 60,
 'sumac': 81,
 'protected': 300,
 'hangovers': 46,
 'carbon': 344,
 'perfectly': 5705,
 'defeats': 266,
 'nat': 55,
 'robbery': 118,
 'targets': 26,
 'specified': 264,
 'strewn': 21,
 'stacked': 140,
 'lifter': 28,
 'mixtures': 146,
 'rejected': 289,
 'audience': 161,
 'walden': 647,
 'beige': 90,
 'mueller': 25,
 'guarenteed': 21,
 'mgs': 48,
 'mango': 4443,
 'nations': 183,
 'repeats': 55,
 'condiment': 618,
 'notches': 84,
 'blanch': 23,
 'lignans': 32,
 'losing': 1183,
 'bm': 88,
 'racoons': 30,
 'cheek': 72,
 'matzo': 96,
 'connoisseurs': 138,
 'forks': 28,
 'horribly': 476,
 'teapots': 34,
 'lupus': 34,
 'cofffee': 51,
 'formerly': 201,
 'unexpected': 573,
 'goooood': 69,
 'exfoliation': 23,
 'pretzels': 4622,
 'primula': 30,
 'enlarged': 48,
 'survivor': 48,
 'bitters': 353,
 'mays': 74,
 'specialize': 28,
 'ck': 77,
 'tetrapak': 25,
 'komodo': 32,
 'sickly': 396,
 '39g': 21,
 'x1': 41,
 'tartlets': 26,
 'deli': 535,
 '340': 53,
 'onions': 1754,
 'saeco': 167,
 'tenderness': 99,
 'nori': 401,
 'peg': 22,
 'shameful': 57,
 'cleveland': 44,
 'thr': 44,
 'begging': 552,
 'inc': 292,
 'lift': 759,
 'gruel': 35,
 'caffeine': 10059,
 'coyotes': 53,
 'surplus': 33,
 'football': 196,
 'southeastern': 31,
 'pm': 395,
 'pinpointed': 32,
 'nevermind': 31,
 'environments': 33,
 'occasionaly': 20,
 'existing': 150,
 'unable': 1779,
 'jealous': 110,
 'b000g6mbua': 28,
 'smartbones': 85,
 'poker': 50,
 'steven': 28,
 'sprightly': 20,
 'tao': 98,
 'epa': 81,
 'lunchtime': 168,
 'girly': 56,
 'dispensed': 131,
 'revived': 24,
 'detest': 79,
 'abd': 26,
 'weavers': 38,
 'shoots': 130,
 'brookstone': 25,
 'strategy': 93,
 'ready': 5416,
 'seductive': 47,
 'deeelish': 20,
 'napkin': 227,
 'popsicle': 46,
 'films': 66,
 'puddle': 75,
 'tought': 28,
 'suites': 28,
 'probiotics': 509,
 'tuxedo': 48,
 'salvage': 87,
 'unrefrigerated': 36,
 'leash': 248,
 'clear': 5699,
 'gnu': 20,
 'samson': 47,
 'cheesburger': 23,
 'consumable': 58,
 'stickers': 82,
 'gator': 31,
 'loosing': 157,
 'popped': 2416,
 'xyla': 25,
 'suprises': 20,
 'overcharge': 20,
 'modified': 827,
 'counterpart': 167,
 'relief': 1089,
 'actually': 27488,
 'ferret': 51,
 'snubbed': 52,
 'throw': 5985,
 'sever': 30,
 'extraneous': 32,
 'sores': 179,
 'fondants': 40,
 'chat': 52,
 'roommates': 69,
 'hamilton': 166,
 'custards': 45,
 '6x': 34,
 'granules': 542,
 'primrose': 31,
 'slipping': 66,
 '34': 605,
 'deviled': 97,
 'projects': 121,
 'foolishly': 54,
 'arrangements': 49,
 'distillation': 25,
 'quenching': 233,
 'puchased': 25,
 'vegans': 364,
 'frutose': 29,
 'monitor': 310,
 'hors': 48,
 'dismay': 288,
 'debris': 181,
 'pricewise': 27,
 'blurry': 22,
 'sumptuous': 37,
 'require': 1668,
 'relocated': 37,
 'exception': 1815,
 'mild': 9929,
 'diseases': 211,
 '0g': 691,
 '440': 46,
 'unfairly': 22,
 'except': 5492,
 'saw': 8888,
 'twilight': 22,
 'celebrity': 83,
 'cupboards': 133,
 'cassis': 27,
 'truth': 810,
 'crepe': 79,
 'dribble': 61,
 'praise': 468,
 'poland': 145,
 '10x': 94,
 'overflow': 129,
 'chipped': 158,
 'weekday': 74,
 'dysfunction': 22,
 'icing': 1217,
 'warped': 48,
 'percentages': 86,
 'lil': 480,
 'sharkbanana': 61,
 'auto': 2254,
 'ledge': 21,
 'cappuchino': 47,
 'tortellini': 87,
 'verbena': 88,
 'gummy': 2866,
 'suffers': 301,
 'decaff': 266,
 'bile': 120,
 'refill': 866,
 'softly': 54,
 'wonderfulness': 27,
 'shorten': 33,
 'turn': 4556,
 'level': 4208,
 'adaptation': 20,
 'bypassed': 23,
 'eagerly': 444,
 'yadda': 67,
 'dog': 83544,
 'everynight': 39,
 'fascination': 26,
 'mae': 156,
 'overtones': 266,
 'undamaged': 289,
 'appears': 2014,
 'continues': 910,
 'complement': 517,
 'snackwells': 62,
 'rationed': 30,
 'regularity': 121,
 'cornucopia': 76,
 'openly': 47,
 'quinn': 46,
 'complains': 131,
 'irritation': 220,
 'ray': 394,
 'tangerine': 654,
 'started': 15617,
 'tendon': 34,
 'repel': 60,
 'flavourful': 138,
 'bakes': 329,
 'discs': 859,
 'mueslis': 36,
 'regrettably': 31,
 'thinner': 975,
 'rather': 13103,
 'sambar': 27,
 'shoulders': 430,
 'espresso': 9322,
 'ritz': 350,
 'transferred': 170,
 'distraction': 122,
 'divorce': 26,
 'centerpiece': 62,
 'professor': 87,
 'blueberry': 4456,
 'outgrown': 35,
 'perishable': 180,
 'ofcourse': 47,
 'dual': 73,
 'quanity': 116,
 'follower': 21,
 'fairness': 236,
 'bulb': 193,
 'nasty': 4404,
 'wednesday': 181,
 'across': 4095,
 'heave': 21,
 'likening': 20,
 'automated': 60,
 'petroleum': 209,
 'bitterly': 22,
 'rig': 26,
 'coarse': 739,
 'paw': 792,
 'munchies': 377,
 'pursue': 43,
 'prayer': 85,
 'blair': 168,
 'sulfites': 173,
 'extensive': 309,
 'artifically': 26,
 'pyrenees': 56,
 'surprising': 818,
 'bisto': 82,
 'suppliments': 26,
 '15mg': 87,
 'most': 45448,
 'bubbled': 32,
 'brisket': 142,
 'logic': 137,
 'pointed': 551,
 'snuggle': 37,
 'oils': 3860,
 'harmless': 138,
 'peets': 158,
 'forcefully': 24,
 'scaled': 29,
 'horse': 505,
 'herbamare': 25,
 'scaring': 23,
 'np': 29,
 'graduates': 72,
 'marketplace': 189,
 'comes': 18307,
 'circling': 20,
 'saturation': 21,
 'lighten': 81,
 'unwilling': 42,
 'bagel': 460,
 'colicky': 90,
 'ethics': 57,
 'diving': 39,
 'yougurt': 45,
 'alway': 93,
 'rightfully': 29,
 'demon': 24,
 'climb': 125,
 'tapenade': 61,
 'stacy': 176,
 'legal': 277,
 'coral': 22,
 'desiccant': 34,
 'banjo': 27,
 'unknowingly': 47,
 'raises': 155,
 'developing': 310,
 'wisdom': 178,
 'savannah': 92,
 'else': 14135,
 'erratic': 63,
 '454': 49,
 'sections': 174,
 'voodoo': 29,
 'shuts': 22,
 'disappointingly': 93,
 'geena': 137,
 'symptoms': 1372,
 'heaven': 3020,
 'anyday': 51,
 'greats': 21,
 '15lb': 89,
 'chamomile': 2352,
 'readers': 108,
 'kettlecorn': 32,
 'remark': 64,
 'vindaloo': 53,
 'bison': 247,
 'guts': 68,
 'aim': 59,
 'starting': 2357,
 'regretting': 37,
 'purists': 109,
 'pic': 145,
 'forthcoming': 50,
 'creaminess': 300,
 'linked': 350,
 'rda': 1393,
 'reservation': 191,
 'rimadyl': 24,
 'harmonious': 25,
 'told': 7310,
 'gaga': 48,
 'adhered': 30,
 'calories': 25678,
 'fought': 112,
 'rinsed': 591,
 'press': 2199,
 'strings': 316,
 'antique': 48,
 'breakfast': 16872,
 'predominately': 26,
 'stellar': 284,
 'pad': 617,
 'promote': 422,
 'overriding': 36,
 '425': 40,
 'bank': 680,
 'mystery': 477,
 'polished': 162,
 'tt': 33,
 'accidentally': 624,
 'twinings': 1993,
 'usage': 382,
 'twitch': 40,
 'design': 1892,
 'microorganisms': 34,
 'borne': 38,
 'groomed': 51,
 'behaved': 89,
 'zd': 24,
 'temper': 60,
 'strengths': 107,
 'hope': 8848,
 'hormel': 364,
 'absorption': 198,
 'peppers': 2334,
 'leaner': 56,
 'weston': 45,
 'ewwww': 37,
 'gardetto': 26,
 'observations': 98,
 'dental': 2699,
 'ghee': 474,
 'inoffensive': 61,
 'ago': 14614,
 'circle': 230,
 'originali': 20,
 '1999': 59,
 'frog': 94,
 'waiting': 2449,
 'lavendar': 59,
 'herbed': 29,
 'chapped': 37,
 'recognition': 78,
 'flavoring': 5095,
 'skewed': 32,
 'stiffness': 73,
 'candida': 149,
 'slowed': 87,
 'lease': 23,
 'gob': 24,
 'vomit': 859,
 'bergamot': 1897,
 'por': 61,
 'nerd': 41,
 'earnest': 35,
 'tastes': 51916,
 'garbage': 2612,
 'biobags': 47,
 'screams': 81,
 'carnival': 121,
 'sauvignon': 33,
 'iodide': 68,
 'unlike': 5486,
 'examination': 49,
 'disliked': 313,
 'thirds': 215,
 'disillusioned': 22,
 'gorgeous': 481,
 'popcorn': 18173,
 'raiser': 31,
 'mt': 242,
 'clarification': 61,
 'mahal': 22,
 'hilarious': 219,
 'reishi': 28,
 'stock': 7779,
 'fibromyalgia': 109,
 'tense': 59,
 'transitional': 33,
 'surroundings': 24,
 'differences': 577,
 'conference': 95,
 'sugared': 357,
 'tries': 877,
 'hopping': 58,
 'bathe': 46,
 'bodies': 470,
 'bolivia': 34,
 'blues': 315,
 'knowing': 2683,
 'quieter': 34,
 'pushing': 370,
 'cleared': 667,
 'crocker': 649,
 'milligrams': 112,
 'shingles': 24,
 'enduring': 39,
 'feeds': 274,
 'proflowers': 112,
 'sabor': 29,
 'feather': 76,
 'daves': 35,
 'stainless': 402,
 'murray': 456,
 'mowing': 34,
 'border': 743,
 'coals': 21,
 'siblings': 106,
 'yummiest': 156,
 'ii': 295,
 'brad': 23,
 'externally': 36,
 'approaching': 124,
 '1tbsp': 34,
 'sugars': 2988,
 'tendons': 127,
 'bend': 258,
 'kittie': 32,
 'lumpfish': 28,
 'westies': 119,
 'satisfied': 5314,
 'ying': 23,
 'preheated': 49,
 'preview': 20,
 'fireball': 25,
 'oxo': 62,
 'quenched': 45,
 'deteriorating': 22,
 'toned': 108,
 'reverse': 329,
 'regulated': 132,
 'gunk': 136,
 'sterling': 33,
 'mailed': 421,
 'thins': 1035,
 'conditioners': 321,
 'probs': 21,
 'floral': 1330,
 'ay': 43,
 'snag': 69,
 'erhs': 20,
 'shelfs': 33,
 'peasy': 35,
 'chapter': 25,
 'staples': 305,
 'elevation': 48,
 '100x': 37,
 'neurotic': 32,
 'poupon': 48,
 'wiry': 45,
 'induction': 37,
 'aerated': 40,
 'squishy': 135,
 'johnny': 84,
 'torte': 23,
 'gj': 46,
 'kopali': 37,
 'lactation': 99,
 'needlessly': 32,
 'fondant': 770,
 'rattled': 20,
 'toledo': 21,
 'dreading': 41,
 'pony': 40,
 'dimension': 162,
 'taurine': 607,
 'crouton': 50,
 'slash': 28,
 'cute': 2268,
 'pinching': 36,
 'clusters': 1365,
 'kate': 32,
 'differently': 717,
 'microwave': 5891,
 'artist': 68,
 'caramels': 495,
 'jury': 117,
 'ltd': 28,
 'jittery': 695,
 'sky': 338,
 'vegetable': 3700,
 'refunded': 544,
 'vacations': 59,
 'switch': 4965,
 'teething': 582,
 'wow': 7490,
 'rescue': 1507,
 'locks': 118,
 'sweetens': 191,
 'diner': 208,
 'hines': 103,
 'attach': 122,
 'pricier': 468,
 '108': 140,
 'urged': 55,
 'pasteurized': 183,
 'jiff': 76,
 'swanson': 161,
 'premature': 73,
 'might': 19904,
 'prepares': 68,
 'crystalline': 39,
 'munch': 1020,
 'aspertame': 138,
 'hfc': 23,
 'dobermans': 33,
 'fanny': 22,
 'poulet': 20,
 'spoiling': 154,
 'coton': 45,
 'shapes': 623,
 'gmos': 353,
 'absorbing': 95,
 'looser': 67,
 'redeeming': 114,
 'muddy': 229,
 'slimjim': 47,
 'jummy': 28,
 'bengals': 54,
 'train': 1121,
 'cells': 554,
 'adventures': 53,
 'bay': 1688,
 'crickets': 35,
 'purrfect': 29,
 'tan': 204,
 'scallops': 112,
 'artisana': 101,
 'safer': 552,
 'amazone': 25,
 'whatsoever': 1135,
 'seriousness': 63,
 'another': 27364,
 'money': 20050,
 'bottarga': 22,
 'shipments': 944,
 'classroom': 63,
 'bubba': 48,
 'cleans': 449,
 'tainted': 257,
 'uniqueness': 42,
 'hes': 96,
 'pleasant': 6296,
 'hook': 142,
 'sealer': 45,
 'singing': 175,
 'refilling': 111,
 'test': 4472,
 'airy': 573,
 'congeal': 21,
 'sippin': 23,
 'drippings': 67,
 'oberto': 312,
 'determined': 439,
 'exclamation': 20,
 'reports': 463,
 'hiss': 26,
 'firmly': 180,
 'brie': 121,
 'gf': 4364,
 'unicorn': 26,
 'dancing': 158,
 'numbs': 22,
 'royale': 28,
 'borderline': 190,
 'bonking': 34,
 'flavorful': 9920,
 'impacts': 101,
 'elixir': 110,
 'nicest': 117,
 'fungusamongus': 25,
 'evoo': 323,
 'call': 5970,
 'chill': 431,
 'sparing': 20,
 'pillows': 153,
 'blind': 470,
 'designs': 125,
 'interestingly': 237,
 'aimed': 77,
 'professional': 740,
 'claw': 87,
 'getting': 21424,
 'insipid': 110,
 'anthon': 30,
 'herbalist': 61,
 'procure': 32,
 'queen': 238,
 'area': 5680,
 'couscous': 703,
 'emergency': 1026,
 'jasmine': 2572,
 'wealthy': 30,
 'enzymes': 386,
 'staleness': 108,
 'popping': 1307,
 'beachside': 29,
 'left': 10289,
 'sampled': 1022,
 'con': 817,
 'insistent': 35,
 '72': 486,
 'mastiffs': 36,
 'tropicana': 35,
 'law': 1875,
 'capsaicin': 95,
 'di': 154,
 'eboost': 131,
 'boarding': 50,
 'grumpy': 35,
 'ahem': 58,
 'rotting': 162,
 'coons': 58,
 'printing': 65,
 'expanding': 115,
 'whites': 379,
 'ineffective': 85,
 'breaking': 1260,
 'roast': 16247,
 'progressively': 47,
 '5lb': 400,
 'approximate': 81,
 'trauma': 31,
 'reccommended': 60,
 'way': 52920,
 'ordeal': 71,
 'advertized': 176,
 'upset': 3134,
 'fancy': 3198,
 'socially': 151,
 'depression': 219,
 'lima': 102,
 'crawl': 64,
 'signature': 628,
 'kracker': 48,
 'rover': 49,
 'granolas': 233,
 'crosses': 25,
 'fleeting': 31,
 'nunaturals': 66,
 'hardware': 144,
 'crude': 717,
 'kansas': 141,
 'anymore': 6299,
 'kix': 89,
 'role': 350,
 'accessible': 164,
 'inedible': 912,
 'alaea': 34,
 'szeged': 26,
 'constipation': 1023,
 'jewell': 28,
 'grasping': 22,
 'deb': 39,
 'eric': 66,
 'everlast': 30,
 'scooby': 39,
 'celebration': 106,
 'pros': 1618,
 'savored': 157,
 'starts': 1874,
 ...}



In [144]:

    
def get_vocab(word_counts):
    '''
    Param: word_counts
    Return: Vocab,vocab_to_int,int_to_vocab
    '''
    vocab = set(word_counts.keys())
    
    vocab_to_int = {}
    int_to_vocab = {}
    
    codes = ["<UNK>","<PAD>","<EOS>","<GO>"]
    for i,code in enumerate(codes):
        vocab_to_int[code] = i

    for i,word in enumerate(vocab,4):
        vocab_to_int[word] = i
        
    int_to_vocab = {i:word for word,i in vocab_to_int.items()}
    return vocab,vocab_to_int,int_to_vocab



In [145]:

    
vocab,vocab_to_int,int_to_vocab = get_vocab(count)



In [146]:

    
print(len(vocab),len(vocab_to_int),len(int_to_vocab))









    



22726 22730 22730



In [147]:

    
# Using pre-trained Conceptnet Numberbatch's Embeddings (https://github.com/commonsense/conceptnet-numberbatch)
def get_word_embeddings():
    embeddings = {}
    with open('./Datasets/embeddings/numberbatch-en-17.06.txt',encoding='utf-8') as em:
        for embed in em:
            em_line = embed.split(' ')
            if len(em_line) > 2: # First line of file is no. of words , number of dimensions
                word = em_line[0]
                embedding = np.array(em_line[1:])
                embeddings[word] = embedding
    print('Word embeddings:', len(embeddings))
    return embeddings



In [148]:

    
CN_embeddings = get_word_embeddings()









    



Word embeddings: 417194



In [149]:

    
not_in_embeddings = [word for word in vocab if word not in CN_embeddings]



In [150]:

    
print("No. of words not in Ebeddings : ",len(not_in_embeddings))









    



No. of words not in Ebeddings :  2759



In [151]:

    
def create_embedding_matrix(int_to_vocab,embeddings,embedding_dim = 300):
    '''
    Params : int_to_vocab, embeddings, embedding_dim
    Return : embedding matrix
    '''
    # Generating empty numpy matrix
    embeding_matrix = np.zeros([len(vocab_to_int),embedding_dim])
    embeding_matrix = embeding_matrix.astype(np.float32)
    
    #Generating random embeddings for words not in CN embeddings
    for i,word in int_to_vocab.items():
        if word in embeddings:
            embeding_matrix[i] = embeddings[word]
        else:
            embeding_matrix[i] = np.array(np.random.normal(embedding_dim))
    return embeding_matrix



In [152]:

    
embeding_matrix = create_embedding_matrix(int_to_vocab,CN_embeddings)



In [153]:

    
print(len(embeding_matrix),len(vocab_to_int))









    



22730 22730



In [154]:

    
def encode_source_target(sources, targets, vocab_to_int):
    '''
    Params : Sources, Targets, vocab_to_int
    Return :encoded_sources, encoded_targets
    '''
    encoded_sources = []
    encoded_targets = []
    for source in sources:
        encod_ent = []
        for word in source.split():
            if word in vocab_to_int:
                encod_ent.append(vocab_to_int[word])
            else:
                encod_ent.append(vocab_to_int["<UNK>"])
        encoded_sources.append(encod_ent)
    
    for target in targets:
        encod_ent = []
        for word in target.split():
            if word in vocab_to_int:
                encod_ent.append(vocab_to_int[word])
            else:
                encod_ent.append(vocab_to_int["<UNK>"])
        encoded_targets.append(encod_ent)
        
    return encoded_sources, encoded_targets



In [155]:

    
encoded_sources, encoded_targets = encode_source_target(text,summary,vocab_to_int)



In [156]:

    
print(len(encoded_sources),len(text))









    



568412 568412

Model



In [157]:

    
# Building Input Placeholders
def model_inputs():
    '''
    Returns : input_,target,learning_rate,keep_prob,source_seq_length,target_seq_length,max_target_seq_length
    '''
    input_ = tf.placeholder(dtype=tf.int32,shape=(None,None),name="inputs")
    target = tf.placeholder(dtype=tf.int32,shape=(None,None),name="target")
    
    learning_rate = tf.placeholder(dtype=tf.float32,name="learning_rate")
    keep_prob = tf.placeholder(dtype=tf.float32,name="keep_prob")
    
    source_seq_length = tf.placeholder(dtype=tf.int32,shape=(None,),name="source_seq_length")
    target_seq_length = tf.placeholder(dtype=tf.int32,shape=(None,),name="target_seq_length")
    
    max_target_seq_length = tf.reduce_max(target_seq_length,name="max_target_seq_length")
    return input_,target,learning_rate,keep_prob,source_seq_length,target_seq_length,max_target_seq_length



In [158]:

    
#Process decoder input
def process_decoder_input(target_data,vocab_to_int,batch_size):
    
    strided_target = tf.strided_slice(target_data,(0,0),(batch_size,-1),(1,1))
    go = tf.fill(value=vocab_to_int["<GO>"],dims=(batch_size,1))
    decoder_input = tf.concat((go,strided_target),axis=1)
    return decoder_input



In [159]:

    
def encoding_layer(embeded_rnn_input,rnn_size,keep_prob,num_layers,batch_size,source_sequence_length):

    def get_lstm(rnn_size,keep_prob=0.7):
        lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
        drop = tf.contrib.rnn.DropoutWrapper(lstm,input_keep_prob=keep_prob)
        return drop
    #     forward lstm layer
    cell_fw = tf.contrib.rnn.MultiRNNCell([get_lstm(rnn_size,keep_prob) for _ in range(num_layers)])

    #     backward lstm layer
    cell_bw = tf.contrib.rnn.MultiRNNCell([get_lstm(rnn_size,keep_prob) for _ in range(num_layers)])
    
    ((encoder_fw_outputs,
              encoder_bw_outputs),
             (encoder_fw_state,
              encoder_bw_state)) = tf.nn.bidirectional_dynamic_rnn(cell_fw=cell_fw,cell_bw=cell_bw,inputs=embeded_rnn_input,
                                    sequence_length=source_sequence_length,dtype=tf.float32)
                                                                     
    encoder_outputs = tf.concat((encoder_fw_outputs, encoder_bw_outputs), 2)
    
    encoder_states = []
    
    for i in range(num_layers):
        if isinstance(encoder_fw_state[i],tf.contrib.rnn.LSTMStateTuple):
            encoder_state_c = tf.concat(values=(encoder_fw_state[i].c,encoder_bw_state[i].c),axis=1,name="encoder_fw_state_c")
            encoder_state_h = tf.concat(values=(encoder_fw_state[i].h,encoder_bw_state[i].h),axis=1,name="encoder_fw_state_h")
            encoder_state = tf.contrib.rnn.LSTMStateTuple(c=encoder_state_c, h=encoder_state_h)
        elif isinstance(encoder_fw_state[i], tf.Tensor):
            encoder_state = tf.concat(values=(encoder_fw_state[i], encoder_bw_state[i]), axis=1, name='bidirectional_concat')
        
        encoder_states.append(encoder_state)
    
    encoder_states = tuple(encoder_states)
    
    return encoder_outputs,encoder_states



In [160]:

    
def training_decoder(dec_embed_input,decoder_cell,encoder_state, output_layer,
                     target_sequence_length,max_target_length):
    
    
    helper = tf.contrib.seq2seq.TrainingHelper(dec_embed_input,target_sequence_length)
    
    decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell,helper,initial_state=encoder_state,
                                              output_layer=output_layer)
    
    
    (final_outputs, final_state, final_sequence_lengths) = tf.contrib.seq2seq.dynamic_decode(decoder=decoder,impute_finished=True,
                                                     maximum_iterations=max_target_length)
    
    return final_outputs



In [161]:

    
def inference_decoder(embeddings,decoder_cell,encoder_state,output_layer,vocab_to_int,
                      max_target_length,batch_size):
    
    start_tokens = tf.tile(tf.constant(dtype=tf.int32,value=[vocab_to_int["<GO>"]]),
                           multiples=[batch_size],name="start_tokens")
    
    helper = tf.contrib.seq2seq.GreedyEmbeddingHelper(embeddings,
                                                      start_tokens=start_tokens,
                                                      end_token=vocab_to_int["<EOS>"])
    
    decoder = tf.contrib.seq2seq.BasicDecoder(decoder_cell,helper,initial_state=encoder_state,
                                              output_layer=output_layer)
    
    (final_outputs, final_state, final_sequence_lengths) = tf.contrib.seq2seq.dynamic_decode(decoder,impute_finished=True,
                                                  maximum_iterations=max_target_length)
    return final_outputs



In [162]:

    
def decoding_layer(target_inputs,encoder_state,embedding,vocab_to_int,rnn_size,target_sequence_length,max_target_length,
                   batch_size,num_layers):
    
    def get_lstm(rnn_size,keep_prob=0.7):
        rnn_size = 2 * rnn_size
        lstm = tf.contrib.rnn.BasicLSTMCell(rnn_size)
        drop = tf.contrib.rnn.DropoutWrapper(lstm,input_keep_prob=keep_prob)
        return drop
    vocab_len = len(vocab_to_int)
    decoder_cell = tf.contrib.rnn.MultiRNNCell([get_lstm(rnn_size) for _ in range(num_layers)])
    output_layer = Dense(vocab_len,kernel_initializer=tf.truncated_normal_initializer(stddev=0.1))
    
    
    embed = tf.nn.embedding_lookup(embedding,target_inputs)
    
    with tf.variable_scope("decoding"):
        
        training_logits = training_decoder(embed,decoder_cell,encoder_state,output_layer,
                                         target_sequence_length,max_target_length)
    
        
    with tf.variable_scope("decoding",reuse=True):
        
        inference_logits = inference_decoder(embedding,decoder_cell,encoder_state,output_layer,vocab_to_int,
                                          max_target_length,batch_size)
    
    return training_logits, inference_logits



In [163]:

    
def seq2seq_model(source_input,target_input,embeding_matrix,vocab_to_int,source_sequence_length,
                  target_sequence_length,max_target_length, rnn_size,keep_prob,num_layers,batch_size):
    '''
    Params : source_input,target_input,embeding_matrix,vocab_to_int,source_sequence_length,
                  target_sequence_length,max_target_length, rnn_size,keep_prob,num_layers,batch_size
    
    Return : training_logits, inference_logits
    '''
    embedings = embeding_matrix
    embed = tf.nn.embedding_lookup(embedings,source_input)
    
    encoder_output,encoder_states = encoding_layer(embed,rnn_size,keep_prob,num_layers,
                                                   batch_size,source_sequence_length)
    
    training_logits, inference_logits = decoding_layer(target_input,encoder_states,embedings,
                                                                vocab_to_int,rnn_size,target_sequence_length,
                                                                max_target_length,batch_size,num_layers)
    
    return training_logits, inference_logits

Batching



In [164]:

    
# Sorting the text and summary for better padding
# sort based on length of length of text
def sort_text_summary(texts,summaries):
    text_length = [(i,text,len(text)) for i,text in enumerate(texts)]
    text_length.sort(key=operator.itemgetter(2))
    
    sorted_text = [text for i,text,length in text_length]
    sorted_summary = []
    for i,text,length in text_length:
        sorted_summary.append(summaries[i])
    return sorted_text,sorted_summary



In [165]:

    
sorted_text, sorted_summary = sort_text_summary(encoded_sources,encoded_targets)



In [166]:

    
len(sorted_text)









    Out[166]:





568412



In [167]:

    
# Padding batches
def pad_sentence_batch(sentence_batch):
    max_length = max([len(sent) for sent in sentence_batch])
    padded_sentences = []
    for sent in sentence_batch:
        sent_len = len(sent)
        if len(sent) < max_length:
            padded_sentences.append(sent + [vocab_to_int["<PAD>"] for _ in range(max_length - sent_len)])
        else:
            padded_sentences.append(sent)
    return padded_sentences



In [168]:

    
def get_batches(encoded_sources, encoded_targets, batch_size):
    
    '''
    Params : encoded_sources, encoded_targets, batch_size
    Return : text_batch,summary_batch,source_seq_len,target_seq_len
    '''
    
    sorted_text, sorted_summary = sort_text_summary(encoded_sources,encoded_targets)
    
    batch_count = len(sorted_text)//batch_size
    
    for i in range(batch_count):
        start = i * batch_size
        end = start + batch_size
        
        text_batch = np.array(pad_sentence_batch(sorted_text[start:end]))
        summary_batch = np.array(pad_sentence_batch(sorted_summary[start:end]))
        
        source_seq_len = [len(sent) for sent in text_batch]
        target_seq_len = [len(sent) for sent in summary_batch]
        
        yield (text_batch,summary_batch,source_seq_len,target_seq_len)



In [176]:

    
# Hyperparametrs
epochs = 3
batch_size = 512
rnn_size = 100
num_layers = 3
learn_rate = 0.01
keep_probability = 0.75

#Model save path
save_path = 'models/model'

display_step = 5



In [177]:

    
# Build Graph

train_graph = tf.Graph()
with train_graph.as_default():
    
    # Load the model inputs   
    input_,target,learning_rate,keep_prob,source_seq_length,target_seq_length,max_target_seq_length = model_inputs()
    
    # Create the training and inference logits
    training_logits, inference_logits = seq2seq_model(input_,target,embeding_matrix,vocab_to_int,source_seq_length,target_seq_length,
                  max_target_seq_length,rnn_size,keep_probability,num_layers,batch_size)
    
    # Create tensors for the training logits and inference logits
    training_logits = tf.identity(training_logits.rnn_output, name='logits')
    inference_logits = tf.identity(inference_logits.sample_id, name='predictions')
    
    masks = tf.sequence_mask(target_seq_length, max_target_seq_length, dtype=tf.float32, name='masks')

    with tf.name_scope("optimization"):
        cost = tf.contrib.seq2seq.sequence_loss(training_logits,target,masks)
        optimizer=tf.train.AdamOptimizer(learning_rate)
        
        # Gradient Clipping
        gradients = optimizer.compute_gradients(cost)
        capped_gradients = [(tf.clip_by_value(grad, -1., 1.), var) for grad, var in gradients if grad is not None]
        train_op = optimizer.apply_gradients(capped_gradients)

Training



In [178]:

    
# Accuracy
def get_accuracy(target, logits):
    """
    Calculate accuracy
    """
    max_seq = max(target.shape[1], logits.shape[1])
    if max_seq - target.shape[1]:
        target = np.pad(
            target,
            [(0,0),(0,max_seq - target.shape[1])],
            'constant')
    if max_seq - logits.shape[1]:
        logits = np.pad(
            logits,
            [(0,0),(0,max_seq - logits.shape[1])],
            'constant')

    return np.mean(np.equal(target, logits))



In [179]:

    
# Split data to training and validation sets (1 Batch for Validation and rest for Training)
train_source = sorted_text[batch_size:]
train_target = sorted_summary[batch_size:]
valid_source = sorted_text[:batch_size]
valid_target = sorted_summary[:batch_size]



In [180]:

    
(valid_text_batch,valid_summary_batch,valid_source_seq_len,valid_target_seq_len) = next(get_batches(valid_source,valid_target,batch_size))



In [181]:

    
print(len(valid_source_seq_len),len(valid_target_seq_len))



In [ ]:

    
with tf.Session(graph=train_graph) as sess:
    sess.run(tf.global_variables_initializer())
    
    for epoch_i in range(epochs):
        for batch_i,(text_batch,summary_batch,source_seq_len,target_seq_len) in enumerate(
            get_batches(train_source,train_target,batch_size)):
            
           
            _, loss = sess.run([train_op,cost],
                              feed_dict={
                                  input_ : text_batch,
                                  target : summary_batch,
                                  learning_rate:learn_rate,
                                  keep_prob : keep_probability,
                                  source_seq_length : source_seq_len,
                                  target_seq_length : target_seq_len
                              })
            
            if batch_i % display_step == 0 and batch_i > 0:
                
                batch_train_logits = sess.run(inference_logits,
                                             feed_dict={
                                                 input_: text_batch,
                                                 source_seq_length: source_seq_len,
                                                 target_seq_length: target_seq_len,
                                                 keep_prob: 1.0
                                             })
                
                batch_valid_logits = sess.run(inference_logits,
                                             feed_dict={
                                                 input_: valid_text_batch,
                                                 source_seq_length: valid_source_seq_len,
                                                 target_seq_length: valid_target_seq_len,
                                                 keep_prob: 1.0
                                             })
                
                train_accuracy = get_accuracy(summary_batch,batch_train_logits)
                valid_accuracy = get_accuracy(valid_summary_batch,batch_valid_logits)
                
                print('Epoch {:>3} Batch {:>4}/{} - Train Accuracy: {:>6.4f}, Validation Accuracy: {:>6.4f}, Loss: {:>6.4f}'
                      .format(epoch_i, batch_i, len(sorted_text) // batch_size, train_accuracy, valid_accuracy, loss))
                
    # Save Model
    saver = tf.train.Saver()
    saver.save(sess, save_path)
    print('Model Trained and Saved')









    



Epoch   0 Batch    5/1110 - Train Accuracy: 0.7544, Validation Accuracy: 0.7522, Loss: 2.0595
Epoch   0 Batch   10/1110 - Train Accuracy: 0.8000, Validation Accuracy: 0.7522, Loss: 1.4508
Epoch   0 Batch   15/1110 - Train Accuracy: 0.7388, Validation Accuracy: 0.7522, Loss: 1.7768
Epoch   0 Batch   20/1110 - Train Accuracy: 0.5776, Validation Accuracy: 0.7258, Loss: 1.9066
Epoch   0 Batch   25/1110 - Train Accuracy: 0.2160, Validation Accuracy: 0.5138, Loss: 1.4715
Epoch   0 Batch   30/1110 - Train Accuracy: 0.5788, Validation Accuracy: 0.6944, Loss: 0.9707
Epoch   0 Batch   35/1110 - Train Accuracy: 0.7399, Validation Accuracy: 0.7314, Loss: 1.0079
Epoch   0 Batch   40/1110 - Train Accuracy: 0.8546, Validation Accuracy: 0.7539, Loss: 0.9211
Epoch   0 Batch   45/1110 - Train Accuracy: 0.8494, Validation Accuracy: 0.7539, Loss: 0.9985
Epoch   0 Batch   50/1110 - Train Accuracy: 0.8483, Validation Accuracy: 0.7539, Loss: 0.9754
Epoch   0 Batch   55/1110 - Train Accuracy: 0.8666, Validation Accuracy: 0.7539, Loss: 0.8379
Epoch   0 Batch   60/1110 - Train Accuracy: 0.8492, Validation Accuracy: 0.7539, Loss: 0.9403
Epoch   0 Batch   65/1110 - Train Accuracy: 0.7548, Validation Accuracy: 0.7539, Loss: 1.5871
Epoch   0 Batch   70/1110 - Train Accuracy: 0.7566, Validation Accuracy: 0.7539, Loss: 1.5764
Epoch   0 Batch   75/1110 - Train Accuracy: 0.7782, Validation Accuracy: 0.7539, Loss: 1.3726
Epoch   0 Batch   80/1110 - Train Accuracy: 0.8158, Validation Accuracy: 0.7539, Loss: 1.1749
Epoch   0 Batch   85/1110 - Train Accuracy: 0.8363, Validation Accuracy: 0.7539, Loss: 1.0665
Epoch   0 Batch   90/1110 - Train Accuracy: 0.7827, Validation Accuracy: 0.7539, Loss: 1.3634
Epoch   0 Batch   95/1110 - Train Accuracy: 0.7389, Validation Accuracy: 0.7539, Loss: 1.6007
Epoch   0 Batch  100/1110 - Train Accuracy: 0.8664, Validation Accuracy: 0.7539, Loss: 0.8570
Epoch   0 Batch  105/1110 - Train Accuracy: 0.8755, Validation Accuracy: 0.7539, Loss: 0.7660
Epoch   0 Batch  110/1110 - Train Accuracy: 0.7396, Validation Accuracy: 0.7539, Loss: 1.5817
Epoch   0 Batch  115/1110 - Train Accuracy: 0.7755, Validation Accuracy: 0.7539, Loss: 1.3788
Epoch   0 Batch  120/1110 - Train Accuracy: 0.7847, Validation Accuracy: 0.7539, Loss: 1.3288
Epoch   0 Batch  125/1110 - Train Accuracy: 0.7357, Validation Accuracy: 0.7539, Loss: 1.6362
Epoch   0 Batch  130/1110 - Train Accuracy: 0.7177, Validation Accuracy: 0.7539, Loss: 1.7406
Epoch   0 Batch  135/1110 - Train Accuracy: 0.7053, Validation Accuracy: 0.7539, Loss: 1.7734
Epoch   0 Batch  140/1110 - Train Accuracy: 0.7637, Validation Accuracy: 0.7539, Loss: 1.3875
Epoch   0 Batch  145/1110 - Train Accuracy: 0.8728, Validation Accuracy: 0.7539, Loss: 0.7323
Epoch   0 Batch  150/1110 - Train Accuracy: 0.7316, Validation Accuracy: 0.7539, Loss: 1.5576
Epoch   0 Batch  155/1110 - Train Accuracy: 0.7920, Validation Accuracy: 0.7539, Loss: 1.2337
Epoch   0 Batch  160/1110 - Train Accuracy: 0.8868, Validation Accuracy: 0.7539, Loss: 0.6674
Epoch   0 Batch  165/1110 - Train Accuracy: 0.8879, Validation Accuracy: 0.7539, Loss: 0.6351
Epoch   0 Batch  170/1110 - Train Accuracy: 0.8423, Validation Accuracy: 0.7539, Loss: 0.9197
Epoch   0 Batch  175/1110 - Train Accuracy: 0.8572, Validation Accuracy: 0.7539, Loss: 0.8311
Epoch   0 Batch  180/1110 - Train Accuracy: 0.8401, Validation Accuracy: 0.7539, Loss: 0.8755
Epoch   0 Batch  185/1110 - Train Accuracy: 0.7535, Validation Accuracy: 0.7539, Loss: 1.3303



In [ ]:

	Summary	Text
0	Good Quality Dog Food	I have bought several of the Vitality canned d...
1	Not as Advertised	Product arrived labeled as Jumbo Salted Peanut...
2	"Delight" says it all	This is a confection that has been around a fe...
3	Cough Medicine	If you are looking for the secret ingredient i...
4	Great taffy	Great taffy at a great price. There was a wid...