In [1]:
# Import Dependencies
import nltk
import random
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
text = """Global warming, also referred to as climate change, is the observed century-scale rise in the average temperature of the Earth's climate system and its related effects. Multiple lines of scientific evidence show that the climate system is warming. Many of the observed changes since the 1950s are unprecedented in the instrumental temperature record, which extends back to the mid-19th century, and in paleoclimate proxy records of climate change over thousands of years."""

In [3]:
# N-Gram Model : Trigram Mode
n = 6

In [4]:
ngrams = {}

In [5]:
# Create the n-grams
for i in range(len(text)-n):
    # Get n-gram
    gram = text[i:i+n]
    if gram not in ngrams.keys():
        ngrams[gram] = []
    # Append grams to dictionary with text/character that follows them
    ngrams[gram].append(text[i+n])

In [6]:
ngrams


Out[6]:
{'Global': [' '],
 'lobal ': ['w'],
 'obal w': ['a'],
 'bal wa': ['r'],
 'al war': ['m'],
 'l warm': ['i'],
 ' warmi': ['n', 'n'],
 'warmin': ['g', 'g'],
 'arming': [',', '.'],
 'rming,': [' '],
 'ming, ': ['a'],
 'ing, a': ['l'],
 'ng, al': ['s'],
 'g, als': ['o'],
 ', also': [' '],
 ' also ': ['r'],
 'also r': ['e'],
 'lso re': ['f'],
 'so ref': ['e'],
 'o refe': ['r'],
 ' refer': ['r'],
 'referr': ['e'],
 'eferre': ['d'],
 'ferred': [' '],
 'erred ': ['t'],
 'rred t': ['o'],
 'red to': [' '],
 'ed to ': ['a'],
 'd to a': ['s'],
 ' to as': [' '],
 'to as ': ['c'],
 'o as c': ['l'],
 ' as cl': ['i'],
 'as cli': ['m'],
 's clim': ['a', 'a'],
 ' clima': ['t', 't', 't', 't'],
 'climat': ['e', 'e', 'e', 'e', 'e'],
 'limate': [' ', ' ', ' ', ' ', ' '],
 'imate ': ['c', 's', 's', 'p', 'c'],
 'mate c': ['h', 'h'],
 'ate ch': ['a', 'a'],
 'te cha': ['n', 'n'],
 'e chan': ['g', 'g'],
 ' chang': ['e', 'e', 'e'],
 'change': [',', 's', ' '],
 'hange,': [' '],
 'ange, ': ['i'],
 'nge, i': ['s'],
 'ge, is': [' '],
 'e, is ': ['t'],
 ', is t': ['h'],
 ' is th': ['e'],
 'is the': [' '],
 's the ': ['o'],
 ' the o': ['b', 'b'],
 'the ob': ['s', 's'],
 'he obs': ['e', 'e'],
 'e obse': ['r', 'r'],
 ' obser': ['v', 'v'],
 'observ': ['e', 'e'],
 'bserve': ['d', 'd'],
 'served': [' ', ' '],
 'erved ': ['c', 'c'],
 'rved c': ['e', 'h'],
 'ved ce': ['n'],
 'ed cen': ['t'],
 'd cent': ['u'],
 ' centu': ['r', 'r'],
 'centur': ['y', 'y'],
 'entury': ['-', ','],
 'ntury-': ['s'],
 'tury-s': ['c'],
 'ury-sc': ['a'],
 'ry-sca': ['l'],
 'y-scal': ['e'],
 '-scale': [' '],
 'scale ': ['r'],
 'cale r': ['i'],
 'ale ri': ['s'],
 'le ris': ['e'],
 'e rise': [' '],
 ' rise ': ['i'],
 'rise i': ['n'],
 'ise in': [' '],
 'se in ': ['t'],
 'e in t': ['h'],
 ' in th': ['e', 'e'],
 'in the': [' ', ' '],
 'n the ': ['a', 'i'],
 ' the a': ['v'],
 'the av': ['e'],
 'he ave': ['r'],
 'e aver': ['a'],
 ' avera': ['g'],
 'averag': ['e'],
 'verage': [' '],
 'erage ': ['t'],
 'rage t': ['e'],
 'age te': ['m'],
 'ge tem': ['p'],
 'e temp': ['e'],
 ' tempe': ['r', 'r'],
 'temper': ['a', 'a'],
 'empera': ['t', 't'],
 'mperat': ['u', 'u'],
 'peratu': ['r', 'r'],
 'eratur': ['e', 'e'],
 'rature': [' ', ' '],
 'ature ': ['o', 'r'],
 'ture o': ['f'],
 'ure of': [' '],
 're of ': ['t'],
 'e of t': ['h'],
 ' of th': ['e', 'e'],
 'of the': [' ', ' '],
 'f the ': ['E', 'o'],
 ' the E': ['a'],
 'the Ea': ['r'],
 'he Ear': ['t'],
 'e Eart': ['h'],
 ' Earth': ["'"],
 "Earth'": ['s'],
 "arth's": [' '],
 "rth's ": ['c'],
 "th's c": ['l'],
 "h's cl": ['i'],
 "'s cli": ['m'],
 'mate s': ['y', 'y'],
 'ate sy': ['s', 's'],
 'te sys': ['t', 't'],
 'e syst': ['e', 'e'],
 ' syste': ['m', 'm'],
 'system': [' ', ' '],
 'ystem ': ['a', 'i'],
 'stem a': ['n'],
 'tem an': ['d'],
 'em and': [' '],
 'm and ': ['i'],
 ' and i': ['t', 'n'],
 'and it': ['s'],
 'nd its': [' '],
 'd its ': ['r'],
 ' its r': ['e'],
 'its re': ['l'],
 'ts rel': ['a'],
 's rela': ['t'],
 ' relat': ['e'],
 'relate': ['d'],
 'elated': [' '],
 'lated ': ['e'],
 'ated e': ['f'],
 'ted ef': ['f'],
 'ed eff': ['e'],
 'd effe': ['c'],
 ' effec': ['t'],
 'effect': ['s'],
 'ffects': ['.'],
 'fects.': [' '],
 'ects. ': ['M'],
 'cts. M': ['u'],
 'ts. Mu': ['l'],
 's. Mul': ['t'],
 '. Mult': ['i'],
 ' Multi': ['p'],
 'Multip': ['l'],
 'ultipl': ['e'],
 'ltiple': [' '],
 'tiple ': ['l'],
 'iple l': ['i'],
 'ple li': ['n'],
 'le lin': ['e'],
 'e line': ['s'],
 ' lines': [' '],
 'lines ': ['o'],
 'ines o': ['f'],
 'nes of': [' '],
 'es of ': ['s'],
 's of s': ['c'],
 ' of sc': ['i'],
 'of sci': ['e'],
 'f scie': ['n'],
 ' scien': ['t'],
 'scient': ['i'],
 'cienti': ['f'],
 'ientif': ['i'],
 'entifi': ['c'],
 'ntific': [' '],
 'tific ': ['e'],
 'ific e': ['v'],
 'fic ev': ['i'],
 'ic evi': ['d'],
 'c evid': ['e'],
 ' evide': ['n'],
 'eviden': ['c'],
 'videnc': ['e'],
 'idence': [' '],
 'dence ': ['s'],
 'ence s': ['h'],
 'nce sh': ['o'],
 'ce sho': ['w'],
 'e show': [' '],
 ' show ': ['t'],
 'show t': ['h'],
 'how th': ['a'],
 'ow tha': ['t'],
 'w that': [' '],
 ' that ': ['t'],
 'that t': ['h'],
 'hat th': ['e'],
 'at the': [' '],
 't the ': ['c'],
 ' the c': ['l'],
 'the cl': ['i'],
 'he cli': ['m'],
 'e clim': ['a'],
 'stem i': ['s'],
 'tem is': [' '],
 'em is ': ['w'],
 'm is w': ['a'],
 ' is wa': ['r'],
 'is war': ['m'],
 's warm': ['i'],
 'rming.': [' '],
 'ming. ': ['M'],
 'ing. M': ['a'],
 'ng. Ma': ['n'],
 'g. Man': ['y'],
 '. Many': [' '],
 ' Many ': ['o'],
 'Many o': ['f'],
 'any of': [' '],
 'ny of ': ['t'],
 'y of t': ['h'],
 'ved ch': ['a'],
 'ed cha': ['n'],
 'd chan': ['g'],
 'hanges': [' '],
 'anges ': ['s'],
 'nges s': ['i'],
 'ges si': ['n'],
 'es sin': ['c'],
 's sinc': ['e'],
 ' since': [' '],
 'since ': ['t'],
 'ince t': ['h'],
 'nce th': ['e'],
 'ce the': [' '],
 'e the ': ['1'],
 ' the 1': ['9'],
 'the 19': ['5'],
 'he 195': ['0'],
 'e 1950': ['s'],
 ' 1950s': [' '],
 '1950s ': ['a'],
 '950s a': ['r'],
 '50s ar': ['e'],
 '0s are': [' '],
 's are ': ['u'],
 ' are u': ['n'],
 'are un': ['p'],
 're unp': ['r'],
 'e unpr': ['e'],
 ' unpre': ['c'],
 'unprec': ['e'],
 'nprece': ['d'],
 'preced': ['e'],
 'recede': ['n'],
 'eceden': ['t'],
 'cedent': ['e'],
 'edente': ['d'],
 'dented': [' '],
 'ented ': ['i'],
 'nted i': ['n'],
 'ted in': [' '],
 'ed in ': ['t'],
 'd in t': ['h'],
 ' the i': ['n'],
 'the in': ['s'],
 'he ins': ['t'],
 'e inst': ['r'],
 ' instr': ['u'],
 'instru': ['m'],
 'nstrum': ['e'],
 'strume': ['n'],
 'trumen': ['t'],
 'rument': ['a'],
 'umenta': ['l'],
 'mental': [' '],
 'ental ': ['t'],
 'ntal t': ['e'],
 'tal te': ['m'],
 'al tem': ['p'],
 'l temp': ['e'],
 'ture r': ['e'],
 'ure re': ['c'],
 're rec': ['o'],
 'e reco': ['r'],
 ' recor': ['d', 'd'],
 'record': [',', 's'],
 'ecord,': [' '],
 'cord, ': ['w'],
 'ord, w': ['h'],
 'rd, wh': ['i'],
 'd, whi': ['c'],
 ', whic': ['h'],
 ' which': [' '],
 'which ': ['e'],
 'hich e': ['x'],
 'ich ex': ['t'],
 'ch ext': ['e'],
 'h exte': ['n'],
 ' exten': ['d'],
 'extend': ['s'],
 'xtends': [' '],
 'tends ': ['b'],
 'ends b': ['a'],
 'nds ba': ['c'],
 'ds bac': ['k'],
 's back': [' '],
 ' back ': ['t'],
 'back t': ['o'],
 'ack to': [' '],
 'ck to ': ['t'],
 'k to t': ['h'],
 ' to th': ['e'],
 'to the': [' '],
 'o the ': ['m'],
 ' the m': ['i'],
 'the mi': ['d'],
 'he mid': ['-'],
 'e mid-': ['1'],
 ' mid-1': ['9'],
 'mid-19': ['t'],
 'id-19t': ['h'],
 'd-19th': [' '],
 '-19th ': ['c'],
 '19th c': ['e'],
 '9th ce': ['n'],
 'th cen': ['t'],
 'h cent': ['u'],
 'ntury,': [' '],
 'tury, ': ['a'],
 'ury, a': ['n'],
 'ry, an': ['d'],
 'y, and': [' '],
 ', and ': ['i'],
 'and in': [' '],
 'nd in ': ['p'],
 'd in p': ['a'],
 ' in pa': ['l'],
 'in pal': ['e'],
 'n pale': ['o'],
 ' paleo': ['c'],
 'paleoc': ['l'],
 'aleocl': ['i'],
 'leocli': ['m'],
 'eoclim': ['a'],
 'oclima': ['t'],
 'mate p': ['r'],
 'ate pr': ['o'],
 'te pro': ['x'],
 'e prox': ['y'],
 ' proxy': [' '],
 'proxy ': ['r'],
 'roxy r': ['e'],
 'oxy re': ['c'],
 'xy rec': ['o'],
 'y reco': ['r'],
 'ecords': [' '],
 'cords ': ['o'],
 'ords o': ['f'],
 'rds of': [' '],
 'ds of ': ['c', 'y'],
 's of c': ['l'],
 ' of cl': ['i'],
 'of cli': ['m'],
 'f clim': ['a'],
 'hange ': ['o'],
 'ange o': ['v'],
 'nge ov': ['e'],
 'ge ove': ['r'],
 'e over': [' '],
 ' over ': ['t'],
 'over t': ['h'],
 'ver th': ['o'],
 'er tho': ['u'],
 'r thou': ['s'],
 ' thous': ['a'],
 'thousa': ['n'],
 'housan': ['d'],
 'ousand': ['s'],
 'usands': [' '],
 'sands ': ['o'],
 'ands o': ['f'],
 'nds of': [' '],
 's of y': ['e'],
 ' of ye': ['a'],
 'of yea': ['r'],
 'f year': ['s'],
 ' years': ['.']}

In [7]:
# Test out N-gram model
current_gram = text[0:n]

In [8]:
result = current_gram

In [9]:
for i in range(200):
    if current_gram not in ngrams.keys():
        break
    possibilities = ngrams[current_gram]
    nextItem = possibilities[random.randrange(len(possibilities))]
    result += nextItem
    current_gram = result[len(result)-n:len(result)]
print(result)


Global warming, also referred to as climate changes since the 1950s are unprecedented in the instrumental temperature of the Earth's climate system and its related effects. Multiple lines of scientific evid