In [1]:
# Frequency distribution is take a list of words and count how many times each word is used

In [2]:
import nltk

In [3]:
# Loading Alice book words
alice = nltk.corpus.gutenberg.words('carroll-alice.txt')

In [4]:
# Getting the frequency distribution for the words 
alice_fd = nltk.FreqDist(alice)

In [5]:
# it is Dictionary, with key as word and value as count of no times of word appeared
alice_fd


Out[5]:
FreqDist({'[': 3,
          'Alice': 396,
          "'": 1731,
          's': 195,
          'Adventures': 3,
          'in': 357,
          'Wonderland': 3,
          'by': 55,
          'Lewis': 1,
          'Carroll': 1,
          '1865': 1,
          ']': 1,
          'CHAPTER': 12,
          'I': 543,
          '.': 764,
          'Down': 3,
          'the': 1527,
          'Rabbit': 45,
          '-': 141,
          'Hole': 1,
          'was': 352,
          'beginning': 14,
          'to': 725,
          'get': 44,
          'very': 126,
          'tired': 7,
          'of': 500,
          'sitting': 10,
          'her': 243,
          'sister': 9,
          'on': 189,
          'bank': 3,
          ',': 1993,
          'and': 802,
          'having': 10,
          'nothing': 30,
          'do': 68,
          ':': 216,
          'once': 31,
          'or': 76,
          'twice': 5,
          'she': 509,
          'had': 177,
          'peeped': 3,
          'into': 67,
          'book': 11,
          'reading': 3,
          'but': 133,
          'it': 527,
          'no': 69,
          'pictures': 4,
          'conversations': 1,
          'what': 93,
          'is': 97,
          'use': 18,
          'a': 615,
          ",'": 397,
          'thought': 74,
          'without': 26,
          'conversation': 10,
          "?'": 154,
          'So': 27,
          'considering': 3,
          'own': 10,
          'mind': 9,
          '(': 50,
          'as': 246,
          'well': 40,
          'could': 73,
          'for': 140,
          'hot': 7,
          'day': 29,
          'made': 30,
          'feel': 8,
          'sleepy': 5,
          'stupid': 5,
          '),': 8,
          'whether': 11,
          'pleasure': 2,
          'making': 8,
          'daisy': 1,
          'chain': 1,
          'would': 70,
          'be': 145,
          'worth': 4,
          'trouble': 6,
          'getting': 22,
          'up': 98,
          'picking': 2,
          'daisies': 1,
          'when': 69,
          'suddenly': 12,
          'White': 22,
          'with': 175,
          'pink': 1,
          'eyes': 29,
          'ran': 16,
          'close': 13,
          'There': 33,
          'so': 124,
          'VERY': 13,
          'remarkable': 2,
          'that': 275,
          ';': 186,
          'nor': 2,
          'did': 60,
          'think': 50,
          'much': 51,
          'out': 116,
          'way': 56,
          'hear': 14,
          'say': 51,
          'itself': 14,
          'Oh': 35,
          'dear': 28,
          '!': 155,
          'shall': 23,
          'late': 6,
          "!'": 278,
          'over': 40,
          'afterwards': 2,
          'occurred': 2,
          'ought': 14,
          'have': 73,
          'wondered': 1,
          'at': 202,
          'this': 113,
          'time': 68,
          'all': 173,
          'seemed': 27,
          'quite': 53,
          'natural': 4,
          ');': 2,
          'actually': 1,
          'TOOK': 1,
          'A': 17,
          'WATCH': 1,
          'OUT': 1,
          'OF': 3,
          'ITS': 1,
          'WAISTCOAT': 1,
          'POCKET': 1,
          'looked': 45,
          'then': 72,
          'hurried': 11,
          'started': 2,
          'feet': 19,
          'flashed': 1,
          'across': 5,
          'never': 42,
          'before': 36,
          'seen': 15,
          'rabbit': 5,
          'either': 9,
          'waistcoat': 1,
          'pocket': 6,
          'watch': 7,
          'take': 18,
          'burning': 1,
          'curiosity': 5,
          'field': 1,
          'after': 37,
          'fortunately': 1,
          'just': 45,
          'see': 66,
          'pop': 1,
          'down': 99,
          'large': 33,
          'hole': 4,
          'under': 16,
          'hedge': 2,
          'In': 11,
          'another': 22,
          'moment': 31,
          'went': 83,
          'how': 40,
          'world': 7,
          'again': 83,
          'The': 108,
          'straight': 2,
          'like': 84,
          'tunnel': 1,
          'some': 48,
          'dipped': 2,
          'not': 129,
          'about': 94,
          'stopping': 1,
          'herself': 83,
          'found': 30,
          'falling': 2,
          'deep': 7,
          'Either': 1,
          'fell': 6,
          'slowly': 8,
          'plenty': 1,
          'look': 28,
          'wonder': 18,
          'going': 27,
          'happen': 8,
          'next': 28,
          'First': 7,
          'tried': 19,
          'make': 27,
          'coming': 8,
          'too': 25,
          'dark': 3,
          'anything': 19,
          'sides': 4,
          'noticed': 8,
          'they': 129,
          'were': 85,
          'filled': 3,
          'cupboards': 2,
          'shelves': 2,
          'here': 39,
          'there': 65,
          'saw': 14,
          'maps': 1,
          'hung': 1,
          'upon': 26,
          'pegs': 1,
          'She': 39,
          'took': 23,
          'jar': 2,
          'from': 34,
          'one': 94,
          'passed': 5,
          'labelled': 1,
          'ORANGE': 1,
          'MARMALADE': 1,
          "',": 2,
          'great': 39,
          'disappointment': 1,
          'empty': 1,
          'drop': 1,
          'fear': 4,
          'killing': 1,
          'somebody': 5,
          'managed': 4,
          'put': 31,
          'past': 3,
          'Well': 23,
          'such': 40,
          'fall': 7,
          'tumbling': 2,
          'stairs': 3,
          'How': 27,
          'brave': 1,
          'll': 56,
          'me': 61,
          'home': 5,
          'Why': 27,
          'wouldn': 12,
          't': 216,
          'even': 17,
          'if': 78,
          'off': 62,
          'top': 8,
          'house': 18,
          'Which': 8,
          'likely': 5,
          'true': 3,
          '.)': 16,
          'Would': 8,
          'NEVER': 2,
          'come': 25,
          'an': 52,
          'end': 17,
          'many': 12,
          'miles': 3,
          've': 43,
          'fallen': 4,
          'said': 456,
          'aloud': 5,
          'must': 41,
          'somewhere': 2,
          'near': 14,
          'centre': 1,
          'earth': 4,
          'Let': 11,
          'four': 8,
          'thousand': 2,
          "--'": 51,
          'you': 345,
          'learnt': 2,
          'several': 4,
          'things': 31,
          'sort': 20,
          'lessons': 10,
          'schoolroom': 1,
          'though': 10,
          'good': 26,
          'opportunity': 8,
          'showing': 2,
          'knowledge': 3,
          'listen': 7,
          'still': 12,
          'practice': 1,
          ')': 8,
          "'--": 18,
          'yes': 1,
          'right': 30,
          'distance': 8,
          '--': 140,
          'Latitude': 2,
          'Longitude': 2,
          'got': 45,
          'idea': 15,
          'nice': 6,
          'grand': 3,
          'words': 21,
          'Presently': 2,
          'began': 58,
          'THROUGH': 1,
          'funny': 3,
          'seem': 8,
          'among': 12,
          'people': 13,
          'walk': 5,
          'their': 50,
          'heads': 9,
          'downward': 1,
          'Antipathies': 1,
          'rather': 25,
          'glad': 11,
          'WAS': 4,
          'listening': 3,
          'didn': 14,
          'sound': 4,
          'word': 10,
          'ask': 11,
          'them': 88,
          'name': 10,
          'country': 1,
          'know': 87,
          'Please': 5,
          'Ma': 2,
          'am': 16,
          'New': 1,
          'Zealand': 1,
          'Australia': 1,
          'curtsey': 1,
          'spoke': 17,
          'fancy': 7,
          'CURTSEYING': 1,
          're': 38,
          'through': 13,
          'air': 15,
          'Do': 13,
          'manage': 7,
          '?)': 1,
          'And': 67,
          'ignorant': 1,
          'little': 125,
          'girl': 4,
          'asking': 5,
          'No': 20,
          'perhaps': 14,
          'written': 6,
          ".'": 187,
          'else': 12,
          'soon': 24,
          'talking': 16,
          'Dinah': 14,
          'miss': 1,
          'night': 5,
          'should': 27,
          'cat': 11,
          'hope': 3,
          'remember': 14,
          'saucer': 1,
          'milk': 2,
          'tea': 18,
          'my': 55,
          'wish': 21,
          'are': 44,
          'mice': 4,
          'm': 58,
          'afraid': 12,
          'might': 28,
          'catch': 3,
          'bat': 3,
          'mouse': 14,
          'But': 37,
          'cats': 13,
          'eat': 17,
          'bats': 4,
          'saying': 15,
          'dreamy': 1,
          '?': 35,
          'sometimes': 5,
          'couldn': 9,
          'answer': 9,
          'question': 17,
          'matter': 9,
          'which': 41,
          'felt': 23,
          'dozing': 1,
          'begun': 7,
          'dream': 7,
          'walking': 5,
          'hand': 20,
          'earnestly': 2,
          'Now': 12,
          'tell': 27,
          'truth': 1,
          'ever': 20,
          'thump': 2,
          'came': 40,
          'heap': 1,
          'sticks': 1,
          'dry': 8,
          'leaves': 6,
          'bit': 16,
          'hurt': 3,
          'jumped': 6,
          'overhead': 1,
          'long': 31,
          'passage': 4,
          'sight': 10,
          'hurrying': 1,
          'lost': 3,
          'away': 25,
          'wind': 2,
          'turned': 16,
          'corner': 4,
          'ears': 5,
          'whiskers': 3,
          'behind': 13,
          'longer': 3,
          'low': 15,
          'hall': 9,
          'lit': 1,
          'row': 2,
          'lamps': 1,
          'hanging': 3,
          'roof': 6,
          'doors': 2,
          'round': 41,
          'locked': 1,
          'been': 38,
          'side': 17,
          'other': 40,
          'trying': 14,
          'every': 12,
          'door': 30,
          'walked': 10,
          'sadly': 5,
          'middle': 7,
          'wondering': 7,
          'Suddenly': 1,
          'three': 28,
          'legged': 2,
          'table': 17,
          'solid': 1,
          'glass': 10,
          'except': 4,
          'tiny': 4,
          'golden': 7,
          'key': 9,
          'first': 44,
          'belong': 1,
          'alas': 3,
          'locks': 2,
          'small': 10,
          'any': 39,
          'rate': 9,
          'open': 7,
          'However': 13,
          'second': 4,
          'curtain': 1,
          'fifteen': 1,
          'inches': 6,
          'high': 15,
          'lock': 1,
          'delight': 3,
          'fitted': 1,
          'opened': 10,
          'led': 4,
          'larger': 7,
          'than': 23,
          'rat': 1,
          'knelt': 1,
          'along': 6,
          'loveliest': 1,
          'garden': 16,
          'longed': 2,
          'wander': 1,
          'those': 9,
          'beds': 2,
          'bright': 8,
          'flowers': 2,
          'cool': 2,
          'fountains': 2,
          'head': 50,
          'doorway': 1,
          'go': 47,
          'poor': 25,
          'shoulders': 4,
          'shut': 5,
          'telescope': 3,
          'only': 47,
          'begin': 12,
          'For': 13,
          'happened': 7,
          'lately': 1,
          'few': 9,
          'indeed': 15,
          'really': 11,
          'impossible': 3,
          'waiting': 8,
          'back': 38,
          'half': 22,
          'hoping': 3,
          'find': 21,
          'rules': 3,
          'shutting': 2,
          'telescopes': 1,
          'bottle': 10,
          "('": 5,
          'certainly': 13,
          ',)': 9,
          'neck': 7,
          'paper': 4,
          'label': 2,
          'DRINK': 2,
          'ME': 7,
          'beautifully': 2,
          'printed': 1,
          'letters': 1,
          'It': 64,
          'Drink': 1,
          'wise': 2,
          'THAT': 13,
          'hurry': 11,
          'marked': 6,
          '"': 48,
          'poison': 3,
          "';": 5,
          'read': 10,
          'histories': 1,
          'children': 10,
          'who': 48,
          'burnt': 1,
          'eaten': 1,
          'wild': 2,
          'beasts': 2,
          'unpleasant': 2,
          'because': 14,
          'WOULD': 5,
          'simple': 5,
          'friends': 2,
          'taught': 4,
          'red': 2,
          'poker': 1,
          'will': 24,
          'burn': 2,
          'hold': 6,
          'cut': 5,
          'your': 53,
          'finger': 5,
          'deeply': 4,
          'knife': 3,
          'usually': 2,
          'bleeds': 1,
          'forgotten': 6,
          'drink': 4,
          'almost': 6,
          'certain': 3,
          'disagree': 1,
          'sooner': 2,
          'later': 3,
          'NOT': 7,
          'ventured': 4,
          'taste': 2,
          'finding': 3,
          'fact': 8,
          'mixed': 2,
          'flavour': 1,
          'cherry': 1,
          'tart': 1,
          'custard': 1,
          'pine': 1,
          'apple': 1,
          'roast': 1,
          'turkey': 1,
          'toffee': 1,
          'buttered': 1,
          'toast': 1,
          'finished': 12,
          '*': 60,
          'What': 42,
          'curious': 19,
          'feeling': 7,
          'now': 48,
          'ten': 5,
          'face': 15,
          'brightened': 2,
          'size': 13,
          'lovely': 2,
          'however': 7,
          'waited': 11,
          'minutes': 11,
          'shrink': 1,
          'further': 3,
          'nervous': 5,
          'altogether': 5,
          'candle': 3,
          'flame': 1,
          'blown': 1,
          'thing': 49,
          'After': 6,
          'while': 20,
          'more': 47,
          'decided': 3,
          'possibly': 3,
          'reach': 4,
          'plainly': 1,
          'best': 10,
          'climb': 1,
          'legs': 3,
          'slippery': 1,
          'sat': 17,
          'cried': 20,
          'Come': 21,
          'crying': 2,
          'sharply': 4,
          'advise': 1,
          'leave': 7,
          'minute': 21,
          'generally': 7,
          'gave': 13,
          'advice': 1,
          'seldom': 1,
          'followed': 8,
          'scolded': 1,
          'severely': 4,
          'bring': 2,
          'tears': 10,
          'remembered': 5,
          'box': 10,
          'cheated': 1,
          'game': 12,
          'croquet': 8,
          'playing': 2,
          'against': 9,
          'child': 11,
          'fond': 4,
          'pretending': 1,
          'two': 32,
          'pretend': 1,
          'hardly': 11,
          'enough': 18,
          'left': 14,
          'ONE': 4,
          'respectable': 1,
          'person': 4,
          'Soon': 1,
          'eye': 7,
          'lying': 8,
          'cake': 3,
          'EAT': 1,
          'currants': 1,
          'makes': 11,
          'grow': 13,
          'can': 57,
          'smaller': 3,
          'creep': 1,
          'don': 51,
          'care': 4,
          'happens': 5,
          'ate': 1,
          'anxiously': 14,
          "?',": 1,
          'holding': 3,
          'growing': 11,
          'surprised': 7,
          'remained': 3,
          'same': 23,
          'sure': 20,
          'eats': 1,
          'expecting': 3,
          'dull': 3,
          'life': 12,
          'common': 1,
          'set': 14,
          'work': 8,
          'II': 1,
          'Pool': 1,
          'Tears': 1,
          'Curiouser': 1,
          'curiouser': 1,
          'forgot': 2,
          'speak': 13,
          'English': 6,
          'opening': 3,
          'largest': 1,
          'Good': 1,
          'bye': 2,
          'far': 13,
          ').': 4,
          'shoes': 6,
          'stockings': 1,
          'dears': 3,
          '_I_': 2,
          'shan': 5,
          'able': 1,
          'deal': 12,
          'myself': 6,
          ';--': 1,
          'kind': 7,
          'won': 26,
          'want': 9,
          'give': 9,
          'new': 4,
          'pair': 5,
          'boots': 2,
          'Christmas': 1,
          'planning': 1,
          'They': 21,
          'carrier': 1,
          'sending': 2,
          'presents': 2,
          'odd': 1,
          'directions': 3,
          'ALICE': 2,
          'S': 6,
          'RIGHT': 1,
          'FOOT': 1,
          'ESQ': 1,
          'HEARTHRUG': 1,
          'NEAR': 1,
          'THE': 7,
          'FENDER': 1,
          'WITH': 1,
          'LOVE': 1,
          'nonsense': 6,
          'Just': 7,
          'struck': 2,
          'nine': 5,
          'Poor': 2,
          'hopeless': 1,
          'cry': 3,
          'You': 51,
          'ashamed': 2,
          'yourself': 10,
          'Stop': 1,
          'shedding': 1,
          'gallons': 1,
          'until': 5,
          'pool': 10,
          'reaching': 1,
          'heard': 30,
          'pattering': 3,
          'hastily': 16,
          'dried': 1,
          'returning': 1,
          'splendidly': 1,
          'dressed': 1,
          'white': 8,
          'kid': 5,
          'gloves': 11,
          'fan': 10,
          'he': 101,
          'trotting': 2,
          'muttering': 3,
          'himself': 6,
          'Duchess': 42,
          'savage': 4,
          'kept': 13,
          'desperate': 1,
          'ready': 8,
          'help': 9,
          'timid': 3,
          'voice': 47,
          'If': 16,
          'please': 13,
          'sir': 6,
          'violently': 4,
          'dropped': 5,
          'skurried': 1,
          'darkness': 1,
          'hard': 8,
          'fanning': 1,
          'Dear': 1,
          'queer': 12,
          'everything': 12,
          'yesterday': 3,
          'usual': 5,
          'changed': 8,
          'morning': 5,
          'different': 9,
          'Who': 15,
          'Ah': 5,
          'puzzle': 1,
          'thinking': 10,
          'knew': 14,
          'age': 4,
          'Ada': 1,
          'hair': 7,
          'goes': 7,
          'ringlets': 2,
          'mine': 7,
          'doesn': 16,
          'Mabel': 4,
          'sorts': 3,
          'oh': 10,
          'knows': 2,
          'Besides': 1,
          'SHE': 5,
          'puzzling': 4,
          'try': 12,
          'used': 13,
          'times': 6,
          'five': 1,
          'twelve': 4,
          'six': 2,
          'thirteen': 1,
          'seven': 1,
          'twenty': 2,
          'Multiplication': 1,
          'Table': 1,
          'signify': 1,
          'let': 11,
          'Geography': 1,
          'London': 1,
          'capital': 4,
          'Paris': 2,
          'Rome': 2,
          'wrong': 5,
          'doth': 2,
          '--"\'': 4,
          'crossed': 3,
          'hands': 12,
          'lap': 2,
          'repeat': 6,
          'sounded': 5,
          'hoarse': 3,
          'strange': 5,
          ':--': 15,
          'crocodile': 1,
          'Improve': 1,
          'his': 94,
          'shining': 1,
          'tail': 9,
          'pour': 1,
          'waters': 1,
          'Nile': 1,
          'On': 4,
          'scale': 1,
          'cheerfully': 1,
          'seems': 5,
          'grin': 6,
          'neatly': 2,
          'spread': 3,
          'claws': 2,
          'welcome': 1,
          'fishes': 1,
          'With': 4,
          'gently': 3,
          'smiling': 2,
          'jaws': 2,
          'live': 8,
          'poky': 1,
          'toys': 1,
          'play': 8,
          'learn': 7,
          'stay': 5,
          'putting': 3,
          '!"': 6,
          'Tell': 5,
          'being': 19,
          'till': 21,
          '"--': 4,
          'sudden': 5,
          'burst': 1,
          'alone': 4,
          'As': 17,
          'CAN': 4,
          'done': 15,
          'measure': 1,
          'nearly': 10,
          'guess': 3,
          'shrinking': 4,
          'rapidly': 2,
          'cause': 3,
          'avoid': 1,
          'That': 27,
          'narrow': 2,
          'escape': 4,
          'frightened': 7,
          'change': 13,
          'existence': 1,
          'speed': 1,
          'worse': 3,
          'declare': 2,
          'bad': 2,
          'these': 11,
          'foot': 9,
          'slipped': 3,
          'splash': 1,
          'chin': 7,
          'salt': 2,
          'water': 5,
          'Her': 3,
          'somehow': 1,
          'sea': 14,
          'case': 5,
          'railway': 2,
          'seaside': 1,
          'general': 3,
          'conclusion': 2,
          'wherever': 2,
          'coast': 1,
          'number': 4,
          'bathing': 1,
          'machines': 1,
          'digging': 2,
          'sand': 1,
          'wooden': 1,
          'spades': 1,
          'lodging': 1,
          'houses': 1,
          'station': 1,
          'wept': 1,
          'hadn': 7,
          'swam': 5,
          'punished': 1,
          'suppose': 12,
          'drowned': 1,
          'WILL': 3,
          'something': 17,
          'splashing': 2,
          'nearer': 5,
          'walrus': 1,
          'hippopotamus': 1,
          'Everything': 2,
          'talk': 14,
          'harm': 1,
          'O': 3,
          'Mouse': 30,
          'swimming': 2,
          'speaking': 5,
          'brother': 1,
          'Latin': 1,
          'Grammar': 1,
          "!')": 1,
          'inquisitively': 1,
          'wink': 2,
          'its': 56,
          'Perhaps': 3,
          'understand': 6,
          'daresay': 1,
          'French': 4,
          'William': 6,
          'Conqueror': 2,
          'history': 7,
          'clear': 2,
          'notion': 3,
          'ago': 2,
          'Ou': 1,
          'est': 1,
          'ma': 1,
          'chatte': 1,
          'sentence': 5,
          'lesson': 3,
          'leap': 1,
          'quiver': 1,
          'fright': 2,
          'beg': 7,
          'pardon': 6,
          'animal': 2,
          'feelings': 2,
          'Not': 9,
          'shrill': 5,
          'passionate': 1,
          'YOU': 15,
          'soothing': 1,
          'tone': 40,
          'angry': 5,
          'yet': 21,
          'show': 3,
          'our': 7,
          'd': 27,
          'quiet': 2,
          'lazily': 1,
          'sits': 1,
          'purring': 2,
          'nicely': 2,
          'fire': 4,
          'licking': 1,
          'paws': 4,
          'washing': 2,
          'soft': 1,
          'nurse': 3,
          'catching': 2,
          'bristling': 1,
          'offended': 10,
          'We': 10,
          'trembling': 6,
          ...})

In [6]:
# Finding the most common words from the frequency distribution
# .most_common method, passing 10 as getting the first 10 words
# List of tuples, in the order of high occurence to low occurence of words
# Obs: these words are not much descriptive and doesnt give much info about book, lets try rare words
alice_fd.most_common(15)


Out[6]:
[(',', 1993),
 ("'", 1731),
 ('the', 1527),
 ('and', 802),
 ('.', 764),
 ('to', 725),
 ('a', 615),
 ('I', 543),
 ('it', 527),
 ('she', 509),
 ('of', 500),
 ('said', 456),
 (",'", 397),
 ('Alice', 396),
 ('in', 357)]

In [7]:
# Finding the words that occured only once in the book
# use .hapaxes() method
# these words are descriptive, but even then doesnt give much info about book
alice_fd.hapaxes()


Out[7]:
['Lewis',
 'Carroll',
 '1865',
 ']',
 'Hole',
 'conversations',
 'daisy',
 'chain',
 'daisies',
 'pink',
 'wondered',
 'actually',
 'TOOK',
 'WATCH',
 'OUT',
 'ITS',
 'WAISTCOAT',
 'POCKET',
 'flashed',
 'waistcoat',
 'burning',
 'field',
 'fortunately',
 'pop',
 'tunnel',
 'stopping',
 'Either',
 'plenty',
 'maps',
 'hung',
 'pegs',
 'labelled',
 'ORANGE',
 'MARMALADE',
 'disappointment',
 'empty',
 'drop',
 'killing',
 'brave',
 'centre',
 'schoolroom',
 'practice',
 'yes',
 'THROUGH',
 'downward',
 'Antipathies',
 'country',
 'New',
 'Zealand',
 'Australia',
 'curtsey',
 'CURTSEYING',
 '?)',
 'ignorant',
 'miss',
 'saucer',
 'dreamy',
 'dozing',
 'truth',
 'heap',
 'sticks',
 'overhead',
 'hurrying',
 'lit',
 'lamps',
 'locked',
 'Suddenly',
 'solid',
 'belong',
 'curtain',
 'fifteen',
 'lock',
 'fitted',
 'rat',
 'knelt',
 'loveliest',
 'wander',
 'doorway',
 'lately',
 'telescopes',
 'printed',
 'letters',
 'Drink',
 'histories',
 'burnt',
 'eaten',
 'poker',
 'bleeds',
 'disagree',
 'flavour',
 'cherry',
 'tart',
 'custard',
 'pine',
 'apple',
 'roast',
 'turkey',
 'toffee',
 'buttered',
 'toast',
 'shrink',
 'flame',
 'blown',
 'plainly',
 'climb',
 'slippery',
 'advise',
 'advice',
 'seldom',
 'scolded',
 'cheated',
 'pretending',
 'pretend',
 'respectable',
 'Soon',
 'EAT',
 'currants',
 'creep',
 'ate',
 "?',",
 'eats',
 'common',
 'II',
 'Pool',
 'Tears',
 'Curiouser',
 'curiouser',
 'largest',
 'Good',
 'stockings',
 'able',
 ';--',
 'Christmas',
 'planning',
 'carrier',
 'odd',
 'RIGHT',
 'FOOT',
 'ESQ',
 'HEARTHRUG',
 'NEAR',
 'FENDER',
 'WITH',
 'LOVE',
 'hopeless',
 'Stop',
 'shedding',
 'gallons',
 'reaching',
 'dried',
 'returning',
 'splendidly',
 'dressed',
 'desperate',
 'skurried',
 'darkness',
 'fanning',
 'Dear',
 'puzzle',
 'Ada',
 'Besides',
 'five',
 'thirteen',
 'seven',
 'Multiplication',
 'Table',
 'signify',
 'Geography',
 'London',
 'crocodile',
 'Improve',
 'shining',
 'pour',
 'waters',
 'Nile',
 'scale',
 'cheerfully',
 'welcome',
 'fishes',
 'poky',
 'toys',
 'burst',
 'measure',
 'avoid',
 'existence',
 'speed',
 'splash',
 'somehow',
 'seaside',
 'coast',
 'bathing',
 'machines',
 'sand',
 'wooden',
 'spades',
 'lodging',
 'houses',
 'station',
 'wept',
 'punished',
 'drowned',
 'walrus',
 'hippopotamus',
 'harm',
 'brother',
 'Latin',
 'Grammar',
 "!')",
 'inquisitively',
 'daresay',
 'Ou',
 'est',
 'ma',
 'chatte',
 'leap',
 'quiver',
 'passionate',
 'soothing',
 'lazily',
 'sits',
 'licking',
 'soft',
 'bristling',
 'Our',
 'family',
 'HATED',
 'nasty',
 'vulgar',
 'eyed',
 'terrier',
 'curly',
 'farmer',
 'hundred',
 'pounds',
 'kills',
 'rats',
 'commotion',
 'softly',
 'III',
 'Race',
 'Long',
 'Tale',
 'draggled',
 'feathers',
 'clinging',
 'dripping',
 'consultation',
 'familiarly',
 'known',
 'Indeed',
 'positively',
 'refused',
 'Sit',
 'LL',
 'fixed',
 'cold',
 'Ahem',
 'driest',
 'favoured',
 'pope',
 'submitted',
 'leaders',
 'accustomed',
 'usurpation',
 'conquest',
 'shiver',
 'declared',
 'Stigand',
 'patriotic',
 'Canterbury',
 'crossly',
 'worm',
 '\'"--',
 'Edgar',
 'Atheling',
 'conduct',
 'moderate',
 'insolence',
 'Normans',
 'rising',
 'meeting',
 'adjourn',
 'immediate',
 'adoption',
 'energetic',
 'remedies',
 'bent',
 'hide',
 'tittered',
 'audibly',
 'paused',
 'SOMEBODY',
 'inclined',
 'winter',
 'circle',
 'exact',
 'shape',
 'placed',
 'Shakespeare',
 'EVERYBODY',
 'calling',
 'despair',
 'pulled',
 'luckily',
 'prize',
 'Hand',
 'presented',
 'acceptance',
 'elegant',
 'laugh',
 'complained',
 'theirs',
 'ones',
 'patted',
 'begged',
 'promised',
 'C',
 'Mine',
 'prosecute',
 'denial',
 'cur',
 'Such',
 'Sir',
 'cunning',
 'condemn',
 'death',
 'fifth',
 'undo',
 'insult',
 'growled',
 'quicker',
 'daughter',
 'lose',
 'snappishly',
 'patience',
 'oyster',
 'addressing',
 'pet',
 'Magpie',
 'wrapping',
 'Canary',
 'bed',
 'various',
 'pretexts',
 'spirited',
 'IV',
 'Sends',
 'Little',
 'naturedly',
 'completely',
 'Run',
 'Quick',
 'pointed',
 'housemaid',
 'finds',
 'neat',
 'brass',
 'W',
 'RABBIT',
 'engraved',
 'upstairs',
 'lest',
 'fancying',
 'Coming',
 'tidy',
 'hoped',
 'pairs',
 'nevertheless',
 'uncorked',
 'lips',
 'SOMETHING',
 'whenever',
 'expected',
 'pressing',
 'ceiling',
 'save',
 'Alas',
 'kneel',
 'Still',
 'resource',
 'Luckily',
 'magic',
 'grew',
 'pleasanter',
 'rabbits',
 'fairy',
 'tales',
 'HERE',
 'comfort',
 'foolish',
 'Fetch',
 'inwards',
 'attempt',
 'failure',
 'possible',
 'frame',
 'Sounds',
 'pronounced',
 'arrum',
 ".')",
 'fills',
 'whispers',
 'coward',
 'shrieks',
 'frames',
 'pulling',
 'rumbling',
 'cartwheels',
 'ladder',
 'lad',
 'tie',
 'rope',
 'loose',
 'Heads',
 'Nay',
 'Shy',
 'fireplace',
 'scratching',
 'scrambling',
 'Catch',
 'Brandy',
 'choke',
 'Last',
 'thank',
 'ye',
 'flustered',
 'Jack',
 'rocket',
 'produced',
 'SOME',
 'swallowed',
 'thick',
 'arranged',
 'peering',
 'enormous',
 'feebly',
 'stretching',
 'touch',
 'whistle',
 'terribly',
 'spite',
 'Hardly',
 'whereupon',
 'yelp',
 'rushed',
 'worry',
 'dodged',
 'tumbled',
 'heels',
 'cart',
 'horse',
 'trampled',
 'series',
 'charges',
 'forwards',
 'barking',
 'hoarsely',
 'faint',
 'leant',
 'buttercup',
 'fanned',
 'teaching',
 'tricks',
 'blades',
 'circumstances',
 'caterpillar',
 'V',
 'Advice',
 'languid',
 'shyly',
 'sternly',
 'MYSELF',
 'clearly',
 'sizes',
 'chrysalis',
 'butterfly',
 'irritated',
 'state',
 'promising',
 'swallowing',
 'wait',
 'puffed',
 'HOW',
 'DOTH',
 'BUSY',
 'BEE',
 'Repeat',
 'incessantly',
 'son',
 'feared',
 'injure',
 'brain',
 'uncommonly',
 'fat',
 'sage',
 'grey',
 'limbs',
 'supple',
 'ointment',
 'shilling',
 'Allow',
 'couple',
 'tougher',
 'suet',
 'bones',
 'beak',
 'argued',
 'wife',
 'muscular',
 'strength',
 'jaw',
 'Has',
 'steady',
 'balanced',
 'awfully',
 'airs',
 'altered',
 'DON',
 'contradicted',
 'losing',
 'content',
 'rearing',
 'upright',
 'piteous',
 'crawled',
 'underneath',
 'closely',
 'swallow',
 'morsel',
 'immense',
 'length',
 'rise',
 'stalk',
 'result',
 'curving',
 'graceful',
 'zigzag',
 'dive',
 'tops',
 'hiss',
 'pigeon',
 'flown',
 'wings',
 'subdued',
 'sob',
 'banks',
 'hedges',
 'pleasing',
 'hatching',
 'weeks',
 'sorry',
 'annoyed',
 'highest',
 'raising',
 'needs',
 'wriggling',
 'invent',
 'deepest',
 'contempt',
 'denying',
 'egg',
 'truthful',
 'adding',
 'YOURS',
 'raw',
 'nest',
 'crouched',
 'untwist',
 'Whoever',
 'frighten',
 'wits',
 'righthand',
 'VI',
 '--(',
 'judging',
 ')--',
 'rapped',
 'knuckles',
 'footmen',
 'powdered',
 'crept',
 'producing',
 'Frog',
 'From',
 'curls',
 'stupidly',
 'knocked',
 'reasons',
 'inside',
 'kettle',
 'INSIDE',
 'knock',
 'uncivil',
 'tomorrow',
 'skimming',
 'grazed',
 'maybe',
 'louder',
 'argue',
 'drive',
 'crazy',
 'variations',
 'Anything',
 'whistling',
 'desperately',
 'idiotic',
 'smoke',
 'stool',
 'Even',
 'sneezed',
 'occasionally',
 'alternately',
 'hearth',
 'grinning',
 'manners',
 'grins',
 'violence',
 'fix',
 'irons',
 'saucepans',
 'blows',
 'PLEASE',
 'agony',
 'terror',
 'PRECIOUS',
 'unusually',
 'saucepan',
 'flew',
 'minded',
 'axis',
 'Talking',
 'axes',
 'chop',
 'glanced',
 'Twenty',
 'bother',
 'abide',
 'figures',
 'lullaby',
 'shake',
 'roughly',
 'annoy',
 'Because',
 'teases',
 '):--',
 'howled',
 'enjoy',
 'pleases',
 'flinging',
 'frying',
 'pan',
 'star',
 'snorting',
 'steam',
 'engine',
 'doubling',
 'straightening',
 'tight',
 'prevent',
 'undoing',
 'kill',
 'murder',
 'grunt',
 'expressing',
 'snout',
 'seriously',
 'sobbed',
 'NO',
 'carry',
 'relieved',
 'trot',
 'handsome',
 'seeing',
 'bough',
 'yards',
 'natured',
 'teeth',
 'treated',
 'respect',
 'Puss',
 'wider',
 'depends',
 'SOMEWHERE',
 'Visit',
 'To',
 'grant',
 'growls',
 'wags',
 'wag',
 'Therefore',
 'growling',
 'happening',
 'hatters',
 'branch',
 'fig',
 'appearing',
 'vanishing',
 'farther',
 'chimneys',
 'thatched',
 'towards',
 'VII',
 'Mad',
 'Tea',
 'Party',
 'elbows',
 'PLENTY',
 'chair',
 'cutting',
 'severity',
 'raven',
 'desk',
 'ravens',
 'desks',
 'fourth',
 'works',
 'grumbled',
 'gloomily',
 'readily',
 'stays',
 'poured',
 'riddle',
 'slightest',
 'Nor',
 'wearily',
 'waste',
 'answers',
 'accounts',
 'terms',
 'Half',
 'mournfully',
 'quarrelled',
 'given',
 'Like',
 'tray',
 'pinch',
 'bawled',
 'murdering',
 'mournful',
 'whiles',
 'vote',
 'alarmed',
 'proposal',
 'fellows',
 'quick',
 'Elsie',
 'Lacie',
 'Tillie',
 'interest',
 'eating',
 'drinking',
 'ways',
 'LESS',
 'opinion',
 'helped',
 'Sh',
 'sh',
 'interrupt',
 'consented',
 'promise',
 'clean',
 'unwillingly',
 'jug',
 'offend',
 'eh',
 'IN',
 'choosing',
 'doze',
 'woke',
 'traps',
 'moon',
 'memory',
 'drawing',
 'rudeness',
 'disgust',
 'teapot',
 'THERE',
 'stupidest',
 'leading',
 'today',
 'unlocking',
 'THEN',
 'VIII',
 'Croquet',
 'Ground',
 'entrance',
 'Look',
 'paint',
 'jogged',
 'Always',
 'blame',
 'deserved',
 'spoken',
 'tulip',
 'onions',
 'flung',
 'brush',
 'unjust',
 'chanced',
 'RED',
 'afore',
 'clubs',
 'oblong',
 'corners',
 'diamonds',
 'merrily',
 'couples',
 'hearts',
 'Kings',
 'Queens',
 'recognised',
 'noticing',
 'velvet',
 'KING',
 'QUEEN',
 'HEARTS',
 'processions',
 'opposite',
 'Idiot',
 'THESE',
 'rosetree',
 'pattern',
 'backs',
 'fury',
 'glaring',
 'beast',
 'Nonsense',
 'bowing',
 'Leave',
 'humble',
 'meanwhile',
 'examining',
 'remaining',
 'execute',
 'protection',
 'pot',
 'marched',
 'Their',
 'evidently',
 'roared',
 'peeping',
 '!"?\'',
 'boxed',
 'laughter',
 'hush',
 'thunder',
 'ridges',
 'furrows',
 'balls',
 'mallets',
 'double',
 'chief',
 'managing',
 'comfortably',
 'straightened',
 'expression',
 'bursting',
 'laughing',
 'provoking',
 'act',
 'crawling',
 'ridge',
 'furrow',
 'send',
 'doubled',
 'parts',
 'played',
 'fighting',
 'furious',
 'uneasy',
 'beheading',
 'appearance',
 'nodded',
 'account',
 'someone',
 'fairly',
 'complaining',
 'quarrel',
 'oneself',
 'attends',
 'arch',
 'croqueted',
 'win',
 'finishing',
 'kiss',
 'likes',
 'impertinent',
 'king',
 'passing',
 'settling',
 'difficulties',
 'screaming',
 'search',
 'engaged',
 'croqueting',
 'helpless',
 'appealed',
 'settle',
 'arguments',
 'HIS',
 'weren',
 'prison',
 'arrow',
 'fading',
 'IX',
 'Story',
 'affectionately',
 'pleasant',
 'hopeful',
 'AT',
 'Maybe',
 'vinegar',
 'sour',
 'camomile',
 'bitter',
 'barley',
 'sweet',
 'stingy',
 'Tut',
 'tut',
 'squeezed',
 'closer',
 'uncomfortably',
 'bore',
 'Somebody',
 'minding',
 'morals',
 'waist',
 'Birds',
 'feather',
 'flock',
 'Right',
 'mineral',
 'attended',
 'vegetable',
 'imagine',
 'cheap',
 'birthday',
 'Thinking',
 'dig',
 'worried',
 'died',
 'favourite',
 'linked',
 'tremble',
 'thunderstorm',
 'fair',
 'warning',
 'absence',
 'shade',
 'delay',
 'cost',
 'Those',
 'whom',
 'sentenced',
 'company',
 'pardoned',
 'picture',
 'lazy',
 'leaving',
 'rubbed',
 'chuckled',
 'executes',
 'ledge',
 'rock',
 'pitied',
 'hollow',
 'EVEN',
 'occasional',
 'exclamation',
 'Hjckrrh',
 'calmly',
 'sink',
 'Drive',
 'mayn',
 'educations',
 'VE',
 'extras',
 'learned',
 'Certainly',
 'OURS',
 'bill',
 'WASHING',
 'extra',
 'afford',
 'inquired',
 'Reeling',
 'Writhing',
 'Arithmetic',
 'Ambition',
 'Distraction',
 'Derision',
 'lifted',
 'uglifying',
 'beautify',
 'prettier',
 'uglify',
 'simpleton',
 'encouraged',
 'counting',
 ...]

In [8]:
# We will discuss more further and find out most defining words in the document