In [10]:
import numpy as np
import lda
import lda.datasets
import pandas as pd

In [2]:
X = lda.datasets.load_reuters()
vocab = lda.datasets.load_reuters_vocab()
titles = lda.datasets.load_reuters_titles()

In [11]:
df = pd.read_table('Headlines_1956.txt')

In [22]:
df = df.dropna()

In [24]:
stoplist = set('for a of the and to in'.split())
texts = [[word for word in document.lower().split() if word not in stoplist]
         for document in df.Headline]

In [26]:
all_tokens = sum(texts, [])
tokens_once = set(word for word in set(all_tokens) if all_tokens.count(word) == 1)
texts = [[word for word in text if word not in tokens_once]
         for text in texts]

In [27]:
texts


Out[27]:
[['year', 'peace'],
 ['marries'],
 ['they', 'share', '$5,000'],
 ['border', 'clash', 'at', 'gaza'],
 ['3,000', 'cheer', 'tengku', 'at', 'ship'],
 ['merdeka', 'trip:', 'all', 'is', 'set'],
 ['rubber', 'shares', 'boom'],
 ['mr.', 'x', 'again'],
 ['all-out', 'assault', 'on', 'ready'],
 ['50', 'much', 'is', 'at', 'john'],
 ['cleared'],
 ['found', 'rally', 'tough'],
 [],
 ['3', 'malayans'],
 ['awards', 'are', 'so', 'very', 'paper', 'said'],
 [],
 ['tengku', 'secret', 'fear'],
 ['answer', 'this', 'question', 'win'],
 ['dog', 'saves', 'four'],
 ['3', 'bandits', 'give', 'up', 'on', 'road'],
 ['shot', 'no.', '1', 'by', 'draw'],
 ['jolly', 'it', "wasn't"],
 ['macd', 'drops', 'again'],
 ['gunman', 'kills', 'from', 'car'],
 ['mr.', 'lee', 'get', 'that', 'mca', 'seat'],
 ['ipoh', 'men', 'rally'],
 ['win', 'prize'],
 ['300', 'at', 'talks'],
 ['hungry', 'reds'],
 ['flood', 'on', 'road'],
 [],
 ['is', '73'],
 ['dies'],
 ['singapore',
  'holiday',
  '60',
  'dash',
  'through',
  'bandit',
  'they',
  'hit',
  'once'],
 ['5', 'escape'],
 ['two', 'reds', 'killed', 'selangor'],
 ['day', 'or', 'wide', "strike'"],
 ['wife', 'found', 'with', 'head'],
 ['$75,000', 'opium'],
 ['haul', 'on', 'ship'],
 ['buses:', 'meet', 'again'],
 ['marshall', 'ill'],
 ['15', 'million', 'times'],
 ['girl',
  'from',
  'has',
  'j',
  'johore',
  'bahru',
  'malay',
  'leaders',
  'are',
  '"'],
 ['colony', 'bans', 'entry', 'bandits'],
 ['shot', 'sea', 'drama'],
 ['workers', 'on', 'strike'],
 ['drop', 'gold'],
 ['weds', 'tengku', 'tonight'],
 ['this', "man'"],
 ['naval', 'base', 'switch'],
 ['girl', 'found', 'man', 'charged'],
 ['sa', 'ved', 'by'],
 ['$100,000', 'pay-out', 'probe'],
 ['prince', 'takes', 'bride'],
 ['miss', 'now', 'princess'],
 ['tengku', '11'],
 ['two', 'ceylon', 'tengku'],
 ['red', 'killed', 'ambush'],
 ['talk', 'on', 'asia'],
 ['by', 'air', 'see', 'son'],
 ['floods', 'on', 'traffic', 'moves'],
 ['25', 'get', 'demands'],
 ['enter', 'united', 'liberal', 'party'],
 ['dog', 'kills', 'man'],
 ['two', 'boys', 'found', 'drowned'],
 ['dies', 'car'],
 ['10,000'],
 ['15', 'share', '$5,000', 'prize'],
 ['homes', 'thousands'],
 ['death', 'dance', 'is', 'just', 'movie'],
 ['cost', 'new', 'pay', 'claim'],
 ['ration', 'cards', 'johore'],
 ['died', 'on'],
 ['bride'],
 ['buses:', 'new', 'offer'],
 ['schoolboy', 'stabbed'],
 ['marshall', "'a", 'very', "man'"],
 ['cards', 'is', 'all', 'right', 'now'],
 ['10,000', 'are', 'set', 'strike'],
 ['mr.', 'm.', 'back', 'at', 'work'],
 ['3', 'quit', 'chiang'],
 ['this', 'blaze', 'made', 'homeless'],
 ['family', 'distress', 'j'],
 ['town', 'is', 'destroyed', '90', 'minutes'],
 ['search', 'baby'],
 ['merdeka:'],
 ['chin', 'peng:', 'hunt', 'on', 'again'],
 ['reds', 'told:', 'give', 'up', 'or', 'die'],
 ['nurses:', 'big', 'march'],
 ['just', 'mr.', 'm', 'gift', 'fit'],
 ["malaya's", 'boom'],
 ['when', 'is', 'soldier'],
 ['no', 'return', 'from', 'china'],
 ['singapore', 'estate'],
 ['a-plane'],
 ['bosses', 'meet', 'on', 'pay', 'claims'],
 [],
 ['carriers', 'will', 'visit', "s'pore"],
 ['is', 'there', 'rebel', 'eleven', 'said', 'yes', 'protest', 'out'],
 ['union', 'chief', '\xe2\x80\x93', 'says:', 'we', 'back', 'mr.'],
 ['march', 'becomes', 'march'],
 ['march', 'down'],
 ['one', 'ruler', 'all'],
 ['shots', 'fired', 'street', 'chase'],
 ['petaling', 'strike'],
 ['first', 'at'],
 ['$20,000:', 'four', 'days', 'left'],
 ['not', 'babies'],
 ['moscow', 'view'],
 ['floods', 'on', 'east', 'coast'],
 ['marshall', 'mission'],
 ['may', 'go', 'april'],
 ['curfew', 'lifted'],
 ['but', 'no', 'wedding'],
 ['there', 'has', 'been', 'too', 'much', 'we', 'must', 'have'],
 ['$20,000'],
 ['suicide', 'victim', 'father'],
 ['$3', 'mil.', 'blaze'],
 ['threat'],
 ['bandit'],
 ['poison', 'victim'],
 ['new', 'coming'],
 ['britons', 'die'],
 ['petaling', 'tin:', 'strike', 'today'],
 ['girl', '16', 'attacked'],
 ['promise', 'was'],
 ['police', 'swoop', 'on', 'gang', 'suspects'],
 ['thugs', 'politics', 'shock'],
 ['two', 'bandits', 'killed'],
 ['mahmud', 'has', "'pay", 'at', 'door'],
 ['3', 'tengku'],
 ['girl', 'fights', 'her', 'mother'],
 ['marshall', 'is', 'ill', 'again'],
 ['midnight'],
 ['wrong', 'but', "they're", 'so', 'glad'],
 ['$20,000'],
 ['pirates', '3', 'men', 'missing'],
 ["tengku's", 'success', 'is', 'sure'],
 ['every', 'new'],
 ['bullets', 'over', 'border'],
 ['malayan', 'judge', 'dies'],
 ['four', 'saved', 'after', '9', 'hours', 'sea'],
 ['strikes', 'again'],
 ['young', 'british', 'officer', 'shot', 'by', 'bandits'],
 ['10', 'missing'],
 ['jet', 'drama'],
 ['mr.', 'joins', 'alliance', 'party'],
 ['bus', 'court', 'men', 'visit'],
 ['big', 'search'],
 ['semenyih:'],
 ['another', 'row', 'at', 'hospital'],
 ['3', 'people'],
 ['i', 'from', 'one', 'bride', 'another'],
 ['rule', 'by', 'democracy', 'or', 'future', 'is', 'up', "you'"],
 ['bid', 'easy'],
 ['train', 'crash', 'kills', '15'],
 ['ultimatum', 'by', '4,000'],
 ['die'],
 ['tuc', 'pay', 'plan'],
 ["'let", 'us', 'all', 'be'],
 ['terror', 'town', 'over', 'call', 'love'],
 ['may', 'force', 'him', 'leave', 'school'],
 ['red', 'leader', 'three', 'men', 'killed'],
 ['4', 'held', 'after', 'guns', 'vanish'],
 ['wide', 'strike', 'threat'],
 ['surprise'],
 ['wait', 'you'],
 ['naval', 'base', 'men', 'strike'],
 ['ike', 'decide'],
 ['now', "it's", 'osman'],
 ['another', 'm.p.', 'on', 'merdeka'],
 [],
 ['scholars', 'get', 'together'],
 ['slain', 'girl:', '3', 'accused'],
 ['tengku', 'late', 'war'],
 ['ambush', 'reds', 'one', 'dies'],
 ['almost', 'another', 'disaster'],
 ['two', 'more', 'give', 'up', 'johore'],
 ['merdeka'],
 ['wage', '320,000'],
 ['seamen', 'unite'],
 ['marshall', 'hospital'],
 ['tengku', "it's", 'big', 'surprise'],
 ['they', 'hope', 'wed', 'johore', 'bahru'],
 ['at'],
 ['gang', 'off', 'haul'],
 ['dock', 'strike', 'goes', 'on'],
 ['rahman', 'made', 'them', 'look', 'up'],
 ['question', 'that', 'answer'],
 ['arbitration'],
 ['big', 'exodus'],
 ['marshall', 'is', 'doing', 'fine'],
 ['police', 'shock'],
 ['child', 'starts', 'fire'],
 [],
 ['shell', 'tapper'],
 ['big', 'opium', 'haul', 'from', 'ship'],
 ['new', 'polio', 'drug'],
 ['ghost'],
 ['tiger', 'killed', 'by', 'guard'],
 ['boy', 'they', "didn't", 'want'],
 ['mm', 'i', 'is', 'now', 'on', 'sale', 'get', 'your', 'copy', 'today'],
 ['closed', 'door', 'talks', 'on', 'finance'],
 ['anthem:', 'new', 'shocks'],
 ['over', 'yes', 'no', 'mr.', 'yap', 'quits'],
 ['car', 'turns', 'river'],
 ['crash', 'kills', '22'],
 ['two', 'reds', 'killed'],
 ['girl', 'drowned'],
 ['wage', 'claim', 'surprise'],
 ['by', 'jet', 'pilot'],
 ['merdeka', 'talks', 'top'],
 ['hong', 'kong', 'plan', 'starts', 'storm'],
 ['merdeka:', 'british', 'agree', 'everything'],
 ["it's"],
 ['500', 'quit', 'before', 'half', 'time'],
 [],
 ['city', 'council', 'faces', 'cinema', 'ban', 'writ'],
 ['more'],
 ['bride'],
 ['six', 'knifed', 'naval', 'base'],
 ['buddhist'],
 ['girl,', '3,', 'killed', 'by', 'tree'],
 ['move', 'bar', 'marriage', 'among', 'royalty'],
 ['dollars'],
 ['two', 'share', '$5,000'],
 ['seato', 'talks', 'agreement'],
 ['spies', 'watch', 'strikers'],
 ['they', 'ask', 'ban', 'on', 'sir'],
 [],
 ['tax', 'gets'],
 ['reds', 'told:', 'only', '17', 'days', 'left', 'give', 'up'],
 ['on', 'malacca'],
 ['market', 'fire'],
 ['nude', 'gets', 'o.k.'],
 ['no,', 'says', 'singapore'],
 ['tengku', 'gets', 'all', 'he', 'wants'],
 ['tears', 'goodbye'],
 ['first', 'prize', 'plan'],
 ['kidnap', 'gang', 'smashed'],
 ['air', 'exercise', 'ends'],
 ['7'],
 ['30', 'grenades', 'missing'],
 ["'end", "strike'", 'move', 'at', 'base'],
 ['rubber:', 'big', 'riddle'],
 ['de', 'cruz', "i'll", 'take', 'job'],
 ['double', 'death'],
 ['bandits'],
 ['slaying'],
 ['fatal', 'collapse'],
 ['sir', 'anthony', 'greets', 'tengku'],
 ['girl', 'needle'],
 ['russian', 'premier', 'resigns'],
 ['new', 'bid', 'end', 'strike', 'by', '9,000'],
 ['merdeka', 'gift', 'sought'],
 ['big', 'hunt', 'after', 'inspector', 'shot', 'down'],
 ['towkay', 'robbed', 'on', 'way', 'bank'],
 ['hurt'],
 ['will', 'be', 'june', 'bride'],
 ['dock', 'strike', 'hits'],
 ['car', 'gang', 'two', 'men'],
 ['on', 'politics'],
 ['bandits', 'kill', 'official'],
 ['naval', 'base', 'warning'],
 ['five', 'seamen'],
 ['record'],
 ['rubber', 'ban'],
 ['is'],
 ['charge', 'inquiry', 'opens'],
 ['five', 'jolly', 'ponder', 'over', 'that', 'trip', "s'pore"],
 ['union', 'will', 'strike', 'next', 'week'],
 [],
 ['talks'],
 ['job'],
 ['$3', 'million', 'pay', 'rise'],
 ['mr.', 'm', 'calls', 'off', 'his', 'india', 'trip'],
 ['10', 'at', 'festival'],
 ['floods', 'halt', 'traffic'],
 ['with', 'two'],
 ['five', 'men', 'will', 'draw', 'up', 'merdeka'],
 ['plane', 'shot', 'at'],
 ['no', 'talks', 'or'],
 ['plane', 'land'],
 ['18,000', 'miners', 'may', 'strike'],
 ['marshall', 'is', 'back', 'with', 'promise'],
 ['new', 'plant', 'at'],
 ['three', 'share'],
 ['china'],
 ['flood', 'can', 'be', 'fun', "you're", 'young'],
 ['day', 'festival'],
 ['seaman', 'injured'],
 ['floods', 'go'],
 ['first', 'death', 'floods'],
 ["'be"],
 ['daring', 'swoop'],
 ['lloyd', 'leave'],
 ['by', 'colony'],
 ['mr.', 'm.', 'is', 'leaders', 'say'],
 ['mm', 'rumi', 'is', 'out', 'tomorrow'],
 ['tengku', 'take', 'charge', 'defence'],
 ['first', 'asian', 'dies', 'at', '80'],
 ['lose', '$100,000', 'floods'],
 ['all', 'troops', "'must", "go'"],
 ["front's", 'london'],
 ['merdeka:', 'two'],
 ['marshall', 'says:', 'i', 'may', 'resign'],
 ['robbers', 'stab', 'woman'],
 ['butchers', 'want', 'more', 'pay'],
 ['floods', 'halt', 'dash', 'by'],
 ['wife', 'where', 'am'],
 ['death', 'hits', 'twice', 'same', 'way'],
 ['tapper', 'hurt', 'after', 'shot', 'at', 'food', 'check', 'point'],
 ['rumi', 'success'],
 ['new', 'bid', 'end', 's.t.c.', 'strike'],
 ['dies'],
 ['police', 'arrest', 'strikers'],
 ['colony', 'grenade', 'terror'],
 ['that', 'will', 'not', 'stop', 'me'],
 ['two', 'cents', 'more', 'today', 'malaya'],
 ['from', 'colony'],
 ['more', 'bag', 'as', 'run', 'low'],
 ['floods'],
 ['tengku', 'his', 'mind', 'now'],
 ['10', 'houses'],
 ['27', 'hurt', 'flash', 'lightning', 'hits', 'school'],
 ['tengku', 'has', 'secret', 'date'],
 ['will', 'act', 'on', 'own', 'china', 'trade', 'embargo'],
 ['boy', 'drowns', 'pool'],
 ['student', 'held'],
 ['all', 'join', 'attack'],
 ['naval', 'base', 'strike', 'off'],
 ['all', 'quiet', 'after', 'bomb', 'terror'],
 ["'rebels'", 'may', 'be', 'expelled'],
 ['london', '\xe2\x80\x94', 'then', 'sun'],
 ['dies'],
 ['floods'],
 ['planter', 'dies', 'fighting'],
 ['tin', 'drops'],
 ['jakarta'],
 ['special', 'jobs', 'body', 'police'],
 ['blazing', 'boat', 'adrift', 'on', 'river'],
 ['grenade', 'probe'],
 ['tin:', 'drop'],
 ['union', 'ban', 'students'],
 ['de', 'cruz', 'cables', 'denial'],
 ['threat', 'us,', 'say', 'workers'],
 ['queen', 'two'],
 ['who', 'was', 'too', '1'],
 ['cost'],
 ['air', 'record'],
 ['ban', 'on', 'marilyn'],
 ['donald'],
 ['talks'],
 ['50', 'red', 'held', 'at', 'dawn'],
 ['more', 'aid', 'malaya'],
 ['colony', 'man', 'gets', 'damages'],
 ['days', 'left...'],
 ['army', 'taken', 'on', 'gun', 'chase'],
 ['hornets', 'sting'],
 ['three', 'die', 'smash'],
 ['victim'],
 ['rubber', 'china'],
 [],
 ['drowned', 'by', 'lightning'],
 ['johore', 'roads', 'flooded'],
 ['10', 'we', 'gave', 'help', 'reds'],
 ['long,', 'long', 'list'],
 ['stand', 'by'],
 ['surprise', 'pledge', 'by', 'pap'],
 ['merdeka:', 'surprise', 'by', 'marshall'],
 ['wins', 'first', 'prize'],
 ['help', 'varsity', 'by', 'black'],
 ['four', 'share', '$5,000'],
 ['500', 'at', 'ceylon'],
 [],
 ["it's", 'thanks', 'tengku'],
 ['girl', 'year'],
 ['new', 'ban', 'on', 'travel'],
 ['pakistan', 'press'],
 ['umno', 'out', 'two', 'singapore', 'rebels'],
 ['sir', "'the", 'man', 'with', 'mind', 'at', 'his'],
 ['osman', 'misses', 'classes'],
 ['7', 'more'],
 ['so'],
 ['15,000', 'at', 'big', 'anti-red', 'rally'],
 ['tengku', 'triumphs'],
 ['meet', 'me', 'again'],
 ['murdered'],
 ['two', 'bid'],
 ['secret', 'probe'],
 ['tan', 'liberal'],
 ['$20,000', 'post', 'now'],
 ['double', 'rescue', 'by'],
 ["s'pore", 'merdeka', 'test'],
 ['suicide', 'house'],
 ['week', 'planned', 'clerks'],
 ['city', 'council', 'will', 'have'],
 [],
 ['three', 'road', 'deaths', 'one', 'day'],
 ['is'],
 ['thugs', 'beat', 'up'],
 ['then', 'bullet'],
 ['$20,000'],
 ['not'],
 ['hunger', 'strike', 'decision', 'today'],
 ['he', 'reds'],
 ['big', 'land', 'freeze'],
 ['merdeka', '1957'],
 ['last', 'surrender', 'chance', 'reds', 'today'],
 ['first', 'man', 'home'],
 ['its', 'sweet'],
 ['schoolgirl', 'robbed', 'murdered'],
 ['4', 'days', 'left...'],
 ['bank', 'clerk', 'missing'],
 ['colony'],
 ['war', 'on', 'from', '200', 'yards'],
 ['school', 'is', 'closed'],
 ['cracker', 'boys', 'go', 'on', 'spree'],
 ['rubber', 'fraud'],
 ['straits', 'times.'],
 ['months', 'm-day'],
 ['alliance', 'is', 'ready', 'take', 'over', '3', 'months'],
 ['chin', 'peng', 'now', 'keep', 'his'],
 ['last', 'surrender'],
 ["tengku's"],
 ["tengku's", 'victory', 'salute'],
 ['cable'],
 ['key', 'job'],
 ['hunt', 'detective'],
 ['red', 'outrage'],
 ['woman', 'beats'],
 ['70', 'held', 'swoop', 'on', 'new'],
 ['hospital', 'tour'],
 ['like', '$75,000', 'long'],
 ['lee', 'view'],
 ['thugs', 'hold', 'up', 'towkay'],
 ['riddle', '4'],
 ['dag', 'an', 'hour'],
 ['tengku'],
 ['missing', 'c.i.d.', 'man', 'found', 'liner'],
 ['greetings', 'from', 'marshall'],
 ['lottery:', 'surprise'],
 ['bombs', 'on', 'red'],
 ['r.a.f.', 'ang'],
 ['singapore', 'it', 'is'],
 ['dawn', 'swoop', 'guns', 'by', '300', 'police'],
 ['found'],
 ['face', 'murder', 'bid', 'charge'],
 ['two', 'rest', 'mr.', 'm'],
 ['reds', 'move'],
 ['i', 'year?', 'that', 'should', 'be', 'says'],
 ['12', 'ang'],
 ['shots', 'reds', 'flee'],
 ['peace', 'comes', 'back', '10', 'terror', 'town'],
 ['u.n.', 'officials', 'visit'],
 ['c.', '\xe2\x80\x94', 'court', 'urges', 'big'],
 ['bandit', 'killed'],
 ['swiss', 'holiday'],
 ['car', 'fired'],
 ['armed', 'robbers', 'take', 'shop', 'man', 'on'],
 ['royal'],
 ['reds', 'will', 'not', 'give', 'up', 'arms'],
 ['then', 'rest', "it's", 'm.', 'way', 'j'],
 ['s.t.c.', 'men', 'say', 'yes', 'award'],
 ['new', 'bid', 'split', 'britain', 'u.s.'],
 ['flee', 'cracker', 'crowd'],
 ['quiet', 'jungle'],
 ['tengku:', 'we', 'stay', 'area'],
 ['two', 'stabbed', 'street'],
 ['$25,000', 'damage', 'colony', 'fire'],
 ['stop', 'mr.', 'm.', 'tells', 'europeans'],
 ['defence', 'switch', 'be', 'rushed'],
 ['marshall', 'warns', 's.t.c.'],
 ['mission', 'is', 'home', 'men'],
 ['changes', 'cabinet'],
 ['new'],
 ['students', 'drown'],
 ['2', 'terror', 'gangs', 'ambush', 'police'],
 ['boy,', 'aged', 'drowns', 'swim', 'pool'],
 ['three', 'killed', 'explosion'],
 ['french', 'drop', 'out'],
 ['train', 'upsets', 'holiday', 'service'],
 ['biggest', 'plan'],
 ['terror', 'clean-up', 'under', 'way'],
 ['royal', 'bride', 'gives', 'lunch'],
 ['fijians', 'kill', 'five', 'reds'],
 ['four', 'bandits', 'flee', 'under', 'gurkha', 'fire'],
 ['c.i.d.', 'man', 'is', 'back', "away'"],
 ['stc', 'union', 'agrees'],
 ['all', 'love', 'dog'],
 ['man', 'lost', 'on', 'way', 'home', 'from', 'malaya'],
 ['assembly', 'clerk', 'wants', 'quit'],
 ['100', 'soldiers', 'riot', 'at', 'police', 'post'],
 ['girl,', '3', 'men', 'hurt', 'road', 'crash'],
 ['dock', 'strike', 'end'],
 ['on', 'with', 'strike'],
 ['lift', 'opium', 'tengku'],
 ['big', 'opium', 'row', 'begins'],
 ['right', 'touch'],
 ['six', 'die', 'red', 'trap'],
 ['this', 'is', 'what', 'tengku', 'wants'],
 ['that', "wasn't", 'so', 'quiet'],
 ['stc', 'buses', 'back', 'on', 'roads'],
 ['dog', 'is', 'jailed'],
 ['guest'],
 [],
 ['meets', 'queen'],
 ['boat', 'sinks', 'with', '4', 'men'],
 ['big', 'hunt'],
 ['long,', 'long', 'strike', 'is', 'is', 'happy'],
 ['buses', 'back', 'with', 'big', 'bang'],
 ['all'],
 ['2', 'reds', 'killed'],
 ['malaya'],
 ['labour', 'minister', 'hits', 'out', 'at', 'strikers'],
 ['today'],
 ['valetta', 'hurt'],
 ['a-plane', 'air'],
 ['buses'],
 ['100'],
 ['inspector', 'shot', 'by', 'accident'],
 ['red', 'chinese', 'mass', 'troops'],
 ['change', 'fire'],
 ['130', 'tons'],
 ['nude', 'body', 'near', 'youth', 'accused'],
 ['mr.', 'x', 'wins', '$15,000', 'big', 'contest'],
 ['two', 'share', '$5,000'],
 ['all', 'quiet', 'jungle'],
 ['english', 'dies'],
 ['thugs', 'with', 'guns'],
 ['4', 'hurt'],
 ['is', 'with'],
 ["queen's", 'chinese', 'told'],
 ['labour', 'plans'],
 ['girl', 'hurt', 'by'],
 ['new', 'training', 'is', 'opened'],
 ['tengku', 'welcome', 'will', 'be', 'day', 'late'],
 ['tengku', 'back', 'triumph'],
 ['i', 'waiting'],
 ['we', 'must', 'keep', 'up', 'strength'],
 ['emergency', 'still', 'big', 'worry'],
 ['leader', 'gets', 'death', 'letter'],
 ['midnight'],
 ['cyclist', 'hurt', 'road', 'crash'],
 ['plan', 'unity', 'on', 'may', 'day'],
 ['m-day', "'if", 'god'],
 ['hope'],
 ['broadcast', 'chin', 'peng', 'today?'],
 ['merdeka', 'holiday'],
 ['patrol', 'kills', '3', 'reds'],
 [],
 ['people', 'decide'],
 ['over', 'merdeka', 'all'],
 ['opium'],
 [],
 ['new', "'chin", "peng'", 'letter', 'says', 'tengku'],
 ['woman', 'found', 'on', 'bed', 'police', 'question'],
 ['blast', 'johore'],
 ['reds', 'killed'],
 ['singer', 'hurt', 'crash'],
 ['top', 'job'],
 ['2', 'more', 'killed'],
 [],
 ['rubber', 'drops', 'dollar'],
 ['bombs', 'kill', '13', 'bandits'],
 ['boy,', '12,', 'is', 'knifed'],
 ['chin', 'peng', 'told:', 'your', 'promise'],
 ["i'm", 'sure', 'malayan', 'chinese', 'support'],
 ['quemoy', 'shelled'],
 ['final', 'count'],
 ['exam'],
 ['not', 'blame'],
 ['judge', 'find', 'out'],
 ['film'],
 ['kill', 'mahjong', 'player'],
 ['ministers', 'meet'],
 ['increase', 'bus', 'fares', 'rejected'],
 ['women', 'defy', 'bandits'],
 ['on', 'new', 'raid', 'rock', 'capital'],
 ['bourne', 'cables', 'air', 'chief'],
 ['troops', 'race', 'with', 'time'],
 ['fijians', 'win'],
 ['starts', '16', 'homeless'],
 ['writ', 'against', 'mr.', 'lim'],
 ["s'pore"],
 ['knifed', 'by', 'gang'],
 ['raid', 'by', 'reds'],
 ['or'],
 ['airman', 'jet', 'engine', 'saved'],
 ['canal', 'rescue'],
 ['i', 'freedom'],
 ['split', 'on', 'jobs', 'report'],
 ['more', 'japanese'],
 ['stabbed', 'gang', 'fight'],
 ['another', 'blitz', 'on', 'red'],
 ['mr.', 'm:', 'return', 'put', 'back'],
 ['dies'],
 ['its', 'merdeka'],
 ['big', 'sun', 'blast'],
 ['two', 'each'],
 ['reds', 'batter', 'father', 'as', 'on'],
 ['malayan', 'dies', 'london'],
 ['kiss', 'sir'],
 ['up,', 'up,', 'tin', 'again'],
 ['slap', 'face', 'at', 'big', 'dinner'],
 ['red', 'china', 'hold', 'fashion', 'change', 'but', 'never'],
 ['down', 'but', 'still', 'up'],
 ['black', 'police', 'arrested'],
 ['borneo', 'tour', 'canon'],
 ['father', 'please', "don't", 'jail'],
 ['steel', 'plant', 'india'],
 ['beat', "reds'", 'call'],
 ["there's", 'plane'],
 ['inspector', 'found', 'shot', 'dead'],
 ['hold', 'up', 'traffic'],
 ['reds', 'kill', 'woman'],
 ['what', "doesn't"],
 ['miners', 'walk', 'out'],
 [],
 ['wage', 'boost'],
 ['american', '100', 'ft.', 'death', 'fall'],
 ['player', 'killed'],
 ['soccer', 'dies', 'at', 'first', 'match'],
 ['sir', 'quit'],
 ['11', 'reds'],
 ['u.s.', 'accused'],
 ['boy,', 'takes', 'train', 'ride', 'k.l.', '40', 'cents'],
 ['big', 'loss', 'colony'],
 ["'if", 'we', 'can', 'chin'],
 ['shot', 'fired', 'street'],
 ['big', 'war', 'games', 'begin', 'march'],
 ['aussies', 'merdeka', 'plan'],
 ['$20,000', 'mail', 'today'],
 ['mr.', 'quits'],
 ['visitors'],
 ['jap'],
 ['cabinet'],
 ['colony', 'scare'],
 ['union'],
 ['airport'],
 ['15', 'lost', 'sea'],
 ['or'],
 ['no', 'more', 'night', 'life', 'his'],
 ['two', 'more', 'reds', 'killed'],
 ['casey', 'ceylon'],
 ['red', 'who', 'gives', 'up', 'now'],
 ['all', 'aboard', 'china'],
 ['rulers', 'say', 'yes', 'merdeka', 'agreement'],
 ['miners', 'want', 'talks'],
 ['crisis', 'next', 'm'],
 ['tengku:', 'i', "don't", 'love'],
 ['tengku', 'takes', 'over', 'defence', 'today'],
 ['320,000', 'say:', 'we', 'want', 'new', 'pay', 'pact'],
 ['kidnapped', 'wife', 'found'],
 ['fijians', 'kill', '3'],
 ['year', 'babies'],
 ['up', 'go', '18', 'houses'],
 ['singapore',
  'steals',
  'love',
  '\xe2\x80\x94',
  'family',
  'sails',
  'but',
  'he',
  'stays'],
 ['united', 'front', 'v.', 'reds'],
 ['power', 'tengku'],
 ["'no'", '$4', 'demand'],
 ['once', 'more'],
 ['ike'],
 ['leaves'],
 ["'don't", 'lift', 'opium', 'tan'],
 ['troops', 'quit'],
 ['higher', 'stc', 'fares'],
 ['talks'],
 ['unions', 'colony', 'agree', 'at', 'last'],
 ['challenge'],
 ['two', 'die', 'as', 'cars', 'collide'],
 ['no', 'stc'],
 ['mil.', 'probe', 'demanded'],
 ['drops'],
 ['best'],
 ['poison', 'death', 'was', 'an', 'accident'],
 ['on', 'plane', 'wreck'],
 ['love', 'marriage'],
 ['then', 'new', 'marshall', 'plan'],
 ['$5,000', 'two', 'share'],
 ['go', 'gay'],
 ['call', 'has', 'been'],
 ['80'],
 ['waiting', 'his', 'parents'],
 ['bride'],
 ['canon'],
 ['seven', 'days', 'left', 'you', 'win', '$20,000'],
 ['police', 'ban', 'singapore', "it's"],
 ['students', 'strike'],
 ['hidden', 'bombs', 'alarm'],
 ['first', 'president'],
 ['only', 'six', 'days', 'go'],
 ['round', 'world', 'on'],
 ["'i", 'am', 'not'],
 ['over', 'he', 'twice'],
 ['feaf', 'job'],
 ['alliance', 'is', 'says', 'pap', 'boss'],
 ['mr.', 'm', 'rest', 'at', 'front', 'meeting'],
 ['gurkhas', 'bag', 'bandit'],
 ['merger', 'plan', 'out'],
 ['lib-socs', 'hit', 'back:'],
 ['big', 'hunt', '3'],
 ['soldiers', 'end', 'danger'],
 ['aussie', 'sergeant', 'dies', 'clash'],
 ['less', 'war', 'risk'],
 ['troops', 'find', 'it', 'jungle'],
 ['only', 'five', 'days', 'go'],
 ['cabinet', 'talks'],
 ['jakarta', 'go', 'back', 'on'],
 ['case'],
 ['gas', 'hold', 'kills', '2'],
 ['two', "years'"],
 ['what', "they'll", 'get', 'when'],
 ['right', 'man', 'job'],
 ['an', 'anthem:', '$5,000'],
 ['$20,000', 'only', 'four', 'more', 'days', 'left'],
 ['mr.', 'm', 'names', 'date'],
 ['take', 'over'],
 ['war', 'on', 'tax'],
 ['down', 'but', 'not', 'out'],
 ['change'],
 ['lee', 'hits', 'out', 'at', 'merdeka'],
 ['search', 'ends'],
 ['test', 'air'],
 ['on', 'way'],
 ['gas', 'kills', '14'],
 ['drives', 'meet', 'his', 'bride'],
 ['$20,000:', '3', 'days', 'left'],
 ['siam', 'tengku'],
 ['sir', 'mother', 'dies'],
 ['treaty', 'talks'],
 ['should', 'win', 'british'],
 ['lib-socs', 'again', 'on', 'boycott'],
 ['blast', 'school'],
 ['next', 'merdeka'],
 ['red', 'flags'],
 ['future', 'ahead'],
 ['people', 'get', 'more', 'say', 'war', 'on', 'reds'],
 ['malaya', 'without', 'shot'],
 ['needle', 'boycott'],
 ['boy', 'hurt'],
 ['road', 'crash', 'dead'],
 ['paris', 'talks'],
 ['thugs', 'knife', 'man', 'as', '60', 'watch'],
 ['merdeka', 'drive'],
 ['they', 'will', 'be', "malaya's"],
 ['smiles', 'first', 'time', 'after', 'years', 'pain', '8'],
 ['naval', 'fly-past', 'today'],
 ['19', 'are', 'now'],
 ['royal', 'romance', 'is'],
 ['men', 'were', 'fit'],
 ['rubber', 'poland'],
 ['quits', 'council'],
 ['court', 'major'],
 ['is', 'growing'],
 ['fly-past', 'over', 'colony'],
 ['$5,000', '\xe2\x80\x94', 'two', 'share'],
 ['flying', 'officer'],
 ['what', '5', 'wrong', 'with', 'our', 'asked', 'marshall'],
 ['payroll', 'is'],
 ['hostess'],
 ['off', 'u.k.'],
 ['merdeka', 'hopes', 'good'],
 ['happy', 'news', 'marshall', 'as', 'he', 'listens', 'lloyd'],
 ['flash', 'tengku', 'bride'],
 ['are', 'clue', 'hold-up'],
 ["can't", 'overthrow', 'tengku'],
 ['asian'],
 ['130', 'flee', 'blazing', 'homes'],
 ['two', 'stabbed', 'gang', 'fight'],
 ['tengku', 'weds', 'princess', 'secret'],
 ['he', 'approves', 'violence'],
 ['johore', 'fights', 'fire'],
 ['wage', 'claims', 'by'],
 ['at', 'royal'],
 ['co.', 'fly', 'say:', 'show', 'us', 'good'],
 ['starts', 'on', 'quiet', 'note'],
 ['man', 'who', '$5,000', 'month'],
 ['12', 'sa', 'ved', 'after', 'that', 'was', 'no', 'picnic'],
 ['fire', 'guts', '9', 'houses'],
 ['towkay', 'held', '7', 'days'],
 ['hunger', 'led', 'red', 'surrender'],
 ['on', 'way', 'home', 'from', 'talks'],
 ['man', 'out', "'no"],
 ['merdeka', 'on', 'raffles', 'place', 'leaves', 'car'],
 ['bid'],
 ['royal'],
 ['mystery', 'cyprus', 'mission'],
 ['$6', 'day', 'claim'],
 ['mr.', "m's", 'posters', 'stolen'],
 ['call'],
 ['mission', "china'"],
 ['anti-merdeka', 'angers', 'mr.', 'm'],
 ['council', 'urged', 'back', 'tengku'],
 ['your', 'paper', 'today'],
 ['emergency', "can't", 'end', 'without', 'support'],
 ['bandits', 'jungle'],
 ['federation', 'is', 'on', 'its', 'way'],
 ['wanted', 'rule'],
 ['meet'],
 ['top', 'two', 'women', 'killed'],
 ['drowning', 'man', 'rescued'],
 ['guard', 'dulles'],
 ['work', 'starts', 'on', 'merdeka', 'forces', 'council'],
 ['boac', 'man', 'lost'],
 ['new'],
 ['give', 'way'],
 ['strike', 'threat', 'is', 'off'],
 ['m.p.', 'with', 'loaf', 'hand', 'eye'],
 ['nanyang', 'opens', '\xe2\x80\x94', '1,000', 'cheer'],
 ['two', 'reds', 'killed', 'three', 'escape'],
 ['no', 'big', 'rallies', 'off'],
 ['on', 'colony'],
 ['blazing', 'oil', 'sets', 'river', 'on', 'fire'],
 ['merdeka', 'men', 'named'],
 [],
 ['merdeka', 'men', 'plan'],
 ['arbitration', 'mines', 'dispute'],
 [],
 ['border', 'threat'],
 ['round', 'world', '-in', 'march', 'mm'],
 ['trade', 'pact'],
 ['all', 'set', 'big', 'rally', 'tomorrow'],
 ['$350,000', 'prize', 'pahang'],
 ['$20,000:', 'is', 'your', 'name'],
 ['4'],
 ['wins'],
 ['all', 'ready', 'merdeka', 'rally'],
 ['no', 'says', 'marshall'],
 ['still', 'says'],
 [],
 ["it's", 'so', 'nice', 'be'],
 ['up', 'food'],
 ['riot', 'at', 'rally:', '50', 'hurt'],
 ['on', 'building', 'left', 'just', 'time'],
 ['daring', 'arms', 'raid', 'by', 'reds'],
 ['it', 'started', 'like', 'this'],
 ['like', 'this', '"'],
 ['mr.', 'm:', 'end', 'or', 'govt.', 'quits'],
 ['merdeka', 'resolution'],
 ['reds', 'kill', 'constable'],
 ['all-out', 'now'],
 ['so', 'halt', 'our', 'demands', 'merdeka'],
 ['probe', 'by', 'minister'],
 ['injured', 'by'],
 ['jail'],
 ['21', 'held', 'after', 'raid', 'on', 'village'],
 ['go-slow', 'at', '2', 'quit'],
 ['secret', 'talks', 'on', 'reds'],
 ['wanted:', 'labour', 'front'],
 ['pay', 'demand', 'rejected', 'by', 'estates'],
 ['paper', 'is', 'ready'],
 [],
 ['opium'],
 ['now'],
 ['crippled', 'by', 'strikes'],
 ['mr.', 'de', 'cruz', 'starts', 'with', 'talk'],
 ['wife', 'husband', 'escapes'],
 ['hotel', 'slaying', 'riddle'],
 ['terms'],
 ['man', 'light', 'suit'],
 ['ghost', 'by', 'bus'],
 ['rain', 'drives', 'plane', 'back', 'colony'],
 ['-by', 'mr.', 'marshall'],
 ['wanted:', 'on', 'reds'],
 ['talks'],
 ['10', 'ships', 'go', 'slow', 'port'],
 ['four', 'killed', 'mine'],
 ['local', 'men', 'show', 'your', 'hand'],
 ['row', 'over'],
 ['malay', 'is'],
 ['tengku', 'tours', 'johore'],
 ['boom', 'time', 'malacca'],
 ['stabbed', 'man', 'ill'],
 ["grace's", 'quiet'],
 [],
 ['make', 'all', 'pay', '$1', 'land'],
 ['best', 'only', 'asia'],
 ['tengku:', 'no', 'merger', 'just', 'now'],
 ['go-slow', 'dispute', 'may', 'end', 'today'],
 ['two', 'hurt', 'auster', 'crash'],
 ['police', 'kill', 'kidnappers'],
 ['mil.', 'barracks', 'plan', 'protest'],
 ['flies', 'this'],
 ['led', 'safety'],
 ['pakistan', 'becomes'],
 ["pilot's", 'find', 'led', 'kill'],
 ['50,000', 'workers', 'want', 'month', 'pay'],
 ['go-slow', 'men', 'plan', 'halt'],
 ['varsity'],
 ['man', 'riddle', 'is'],
 ['ban', 'n', 'by'],
 ['m', 'more'],
 ['no', 'japs'],
 ['strike', 'boys', 'warned'],
 ['lord', 'returns'],
 ['first', 'sea', 'lord', 'here'],
 ['mr.', 'marshall', 'star'],
 ['they', 'share', '$5.000'],
 ['bank', 'strike', 'averted'],
 ['sir', 'due', 'tomorrow'],
 ['homage', 'hero'],
 [],
 ['ship', 'colony'],
 ['19', 'warships', 'into', 'mock', 'battle'],
 ['rail', 'strike', 'threat'],
 ['merdeka:', 'secret', 'poll', 'demand'],
 ['p.a.p.', 'up', 'rally', 'merdeka'],
 ['marshall:', 'secret', 'poll'],
 ['merdeka:', '3'],
 ['there', "won't", 'be', 'enough'],
 ['malayan', '$', 'falls'],
 ['400', 'fans', 'leave', 'mr.', 'limp'],
 ['colony', 'queen'],
 ['official', 'warns', 'union'],
 ['china'],
 ['cheques', 'mm', 'prize'],
 ['freight', 'by', 'rail', 'dearer', 'from', 'sunday'],
 ['safety'],
 ['their', 'joy', 'was'],
 ['fight', 'save', "'copter"],
 ['m.'],
 ['death', 'clue', 'search', 'car'],
 ['british'],
 ['kidnap', "man'", 'is', 'held'],
 ['homage', 'royal'],
 ['rahman', 'kills', 'rumour'],
 ['is', 'over'],
 ["'no", 'talk', 'shocks', 'colony'],
 ['reds', 'halt', 'lorry', 'kill', 'worker'],
 ['tapper', 'pay', 'cut', '\xe2\x80\x94', 'union'],
 ['kidnappers', 'who', 'died'],
 ['why', 'marry'],
 ['lift', 'ban', 'on', 'rubber', 'export', 'call'],
 ['merdeka', 'group', 'is'],
 [],
 ['3', 'men', 'held', 'police', 'ambush'],
 ['race', 'against'],
 ['hopes', 'success'],
 ['run', 'will', 'expose'],
 ['take', 'it', "it's"],
 ['went'],
 ['reward'],
 ['tengku'],
 ['merdeka', 'trip', 'will', 'cost'],
 ['ipoh', 'dies'],
 ['funeral', 'mrs.'],
 ['atomic', 'power', 'malaya?', 'yet'],
 ['go-slow:', 'rival', 'union', 'steps'],
 ['wedding', 'bells', 'girls'],
 ['needle', 'out'],
 ['bandits', '2'],
 ['i', "'on", 'star', 'i', "i'd", 'like', 'live', 'i', 'while', 'j'],
 [],
 ['marshall', 'meet', 'europeans'],
 ['engine', 'off'],
 ['death'],
 ['ban', 'he', 'urges'],
 ['colonial', 'schools:', 'attack', 'by', 'union'],
 ['million', 'more', 'from', 'me'],
 ['freak', 'storm', 'fun', 'crowds'],
 ['new', 'railway', 'link', 'will', 'challenge', 'trade'],
 ['petrol', 'lost,', 'out'],
 ['rose', 'singapore'],
 ['big', 'exodus'],
 ['2', 'students'],
 ['death'],
 ['margaret', "won't", 'be', 'there'],
 ['hit', 'by'],
 ['won', 'dies'],
 ['red', 'peace', 'bid', 'probe'],
 ['tengku', 'calls', 'all'],
 ['has', 'store'],
 ['$5,000', 'wedding'],
 ['thieves', 'miss', 'big', 'haul'],
 ['cross', 'line', 'economy', 'ran'],
 ['man', 'dies', 'at'],
 ['russian', 'offer'],
 ['seek', 'tons', 'it'],
 ['protest', 'march'],
 ['18', 'siamese', 'plane', 'disaster'],
 ['tengku', "it's", 'war'],
 ['record'],
 ['boy', 'grips', 'pen', 'with', 'his'],
 ['2', 'killed'],
 ['airliner', 'delayed', '17', 'hours'],
 ['big', 'moment', 'marry'],
 ['tengku', 'changes', 'his', 'cabinet'],
 ['better'],
 ['ipoh', 'driver', 'wins', 'economy', 'run'],
 ['jumat', 'wants', 'merdeka'],
 ['demand', 'by', 'umno', 'party'],
 ['beaten', 'up', 'by', '20', 'men'],
 ['king', 'at', 'u.k.'],
 ['merdeka:', 'all', 'agreed'],
 ['$20,000:', 'seven', 'days', 'left'],
 ['yes,', 'say', "do'", 'i'],
 ['rahman', 'sets', 'new', 'war', 'plans'],
 ['youth', 'accused'],
 ...]

In [ ]: