In [2]:
import nltk
import pandas as pd

In [3]:
df=pd.read_csv('data/pov_seedwords.txt',delimiter='\t')

In [4]:
df.head()


Out[4]:
id corpus intro section length body subject country
0 1 news 1 of 1792 DOCUMENTS //p //p The New York Time... Section A; Column 0; Business/Financial Desk; ... 1715 words MIAMI -- For the Ingram clan, working for the... PUBLIC TRANSPORTATION (90%); MIDDLE INCOME PER... UNITED STATES (96%)
1 2 news 2 of 1792 DOCUMENTS //p //p The New York Time... Section A; Column 0; Business/Financial Desk; ... 1832 words WAXAHACHIE, Tex. -- Most Americans suffered s... SENIOR CITIZENS (91%); MIDDLE INCOME PERSONS (... UNITED STATES (94%)
2 3 news 3 of 1792 DOCUMENTS //p //p The New York Time... Section A; Column 0; Business/Financial Desk; ... 1759 words When the California Labor Commissioner's Offic... LABOR FORCE (90%); FREELANCE EMPLOYMENT (90%);... UNITED STATES (95%)
3 4 news 4 of 1792 DOCUMENTS //p //p The New York Time... Section A; Column 0; National Desk; Pg. 1 1506 words SAN BERNARDINO, Calif. -- A heavily armed man ... SHOOTINGS (92%); GUNSHOT WOUNDS (89%); FIREARM... UNITED STATES (94%)
4 5 news 5 of 1792 DOCUMENTS //p //p The New York Time... Section A; Column 0; National Desk; Pg. 1 1075 words More than one a day. //p That is how often, on... SHOOTINGS (92%); WOUNDS & INJURIES (90%); GUNS... UNITED STATES (94%)

In [7]:


In [9]:
word_tok=nltk.tokenize.RegexpTokenizer(r'\b(\w+(?:[\'\-]\w+)?)\b')

In [10]:
words_in_texts=[]
for doc in df.body:
    words_in_texts.extend(word_tok.tokenize(doc.lower()))

In [25]:
collocates = nltk.BigramCollocationFinder.from_words(words_in_texts, window_size=4)

In [36]:
import copy
poverty = copy.copy(collocates)

In [34]:
poverty.apply_freq_filter(50)

In [35]:
poverty.ngram_fd


Out[35]:
FreqDist({('to', 'foreign'): 52,
          ('born', 'in'): 96,
          ('of', 'obama'): 88,
          ('rise', 'in'): 75,
          ('sanders', 'vermont'): 50,
          ('who', 'a'): 508,
          ('in', 'washington'): 248,
          ('sanders', 'the'): 89,
          ('of', 'technology'): 58,
          ('to', 'act'): 63,
          ('them', 'the'): 398,
          ('course', 'the'): 76,
          ('said', 'no'): 92,
          ('5', 'in'): 50,
          ('of', 'islamic'): 102,
          ('less', 'and'): 51,
          ('might', 'be'): 192,
          ('against', 'p'): 121,
          ('the', 'over'): 252,
          ('on', 'media'): 60,
          ('to', 'people'): 380,
          ('p', 'editor'): 83,
          ('an', 'era'): 50,
          ('not', "it's"): 50,
          ('name', 'the'): 60,
          ('has', 'on'): 193,
          ('mrs', 'the'): 80,
          ('the', 'things'): 84,
          ('bush', 'to'): 68,
          ('then', 'in'): 71,
          ('use', 'to'): 104,
          ('used', 'by'): 66,
          ('federal', 'court'): 56,
          ('the', 'how'): 108,
          ('both', 'sides'): 71,
          ('for', 'at'): 192,
          ('that', 'him'): 80,
          ('in', 'this'): 636,
          ('the', 'iowa'): 60,
          ('workers', 'p'): 58,
          ('the', 'about'): 312,
          ('mr', 'cuomo'): 76,
          ('percent', 'for'): 75,
          ('and', 'each'): 50,
          ('would', 'like'): 78,
          ('we', 'could'): 96,
          ('will', 'take'): 69,
          ('last', 'it'): 50,
          ('to', 'around'): 73,
          ('office', 'to'): 50,
          ('moved', 'to'): 132,
          ('and', 'several'): 63,
          ('said', 'has'): 79,
          ('of', 'history'): 70,
          ('republican', 'presidential'): 130,
          ('law', 'p'): 104,
          ('pay', 'p'): 57,
          ('so', 'p'): 129,
          ('people', 'he'): 70,
          ('students', 'to'): 71,
          ('like', 'are'): 63,
          ('programs', 'the'): 64,
          ('p', 'once'): 57,
          ('those', 'not'): 53,
          ('not', 'even'): 72,
          ("doesn't", 'the'): 59,
          ('new', 'for'): 116,
          ('at', 'any'): 65,
          ('to', 'offer'): 87,
          ('issue', 'p'): 61,
          ('he', 'added'): 267,
          ('director', 'the'): 236,
          ('problem', 'that'): 65,
          ('president', 'and'): 151,
          ('poor', 'and'): 117,
          ('spent', 'of'): 50,
          ('with', 'many'): 71,
          ('version', 'of'): 131,
          ('all', 'and'): 160,
          ('see', 'it'): 54,
          ('out', 'you'): 61,
          ('the', 'set'): 64,
          ('center', 'for'): 143,
          ('people', 'have'): 227,
          ('to', 'create'): 133,
          ('changes', 'to'): 61,
          ('the', 'senate'): 380,
          ('that', 'america'): 60,
          ('by', 'united'): 69,
          ('inequality', 'the'): 182,
          ('attorney', 'general'): 79,
          ('as', 'has'): 134,
          ('left', 'the'): 182,
          ('today', 'is'): 57,
          ('is', 'best'): 53,
          ('spoke', 'of'): 53,
          ('a', 'while'): 114,
          ('years', 'that'): 88,
          ('at', 'university'): 419,
          ('the', 'end'): 358,
          ('feel', 'the'): 55,
          ('a', 'letter'): 74,
          ('spent', 'the'): 61,
          ('to', 'without'): 72,
          ('speech', 'the'): 76,
          ('is', 'hard'): 54,
          ('seems', 'to'): 199,
          ('the', 'ladder'): 57,
          ('to', 'economy'): 53,
          ('an', 'with'): 151,
          ('and', 'very'): 74,
          ('a', 'shot'): 52,
          ('000', 'and'): 88,
          ('that', 'day'): 53,
          ('pope', 'francis'): 85,
          ('the', 'could'): 277,
          ('about', 'policy'): 51,
          ('justice', 'system'): 56,
          ('on', 'as'): 171,
          ('to', 'water'): 57,
          ('of', 'other'): 215,
          ('to', 'end'): 128,
          ("it's", 'like'): 59,
          ('working', 'with'): 74,
          ('the', 'without'): 84,
          ('is', 'an'): 456,
          ('into', 'and'): 177,
          ('what', 'have'): 86,
          ('in', 'october'): 72,
          ('gun', 'the'): 71,
          ('by', 'this'): 58,
          ('her', 'p'): 330,
          ('a', 'long'): 221,
          ('the', 'months'): 138,
          ('as', 'other'): 54,
          ('does', 'the'): 108,
          ('and', 'cruz'): 59,
          ('but', 'those'): 63,
          ('before', 'to'): 70,
          ('before', 'in'): 52,
          ('legislation', 'the'): 66,
          ('said', 'interview'): 135,
          ('to', 'stay'): 93,
          ('of', 'making'): 75,
          ('san', 'bernardino'): 173,
          ('it', 'at'): 102,
          ('of', 'one'): 204,
          ('i', 'and'): 165,
          ('on', 'their'): 223,
          ('a', 'response'): 65,
          ('at', 'mr'): 102,
          ('our', 'in'): 104,
          ('vote', 'in'): 60,
          ('tend', 'to'): 109,
          ('law', 'school'): 50,
          ('chief', 'the'): 71,
          ('not', 'going'): 105,
          ('from', 'that'): 165,
          ('official', 'said'): 79,
          ('and', 'into'): 147,
          ('the', 'victims'): 98,
          ('issues', 'that'): 56,
          ('in', 'long'): 73,
          ('between', 'and'): 570,
          ('in', 'los'): 52,
          ('was', 'after'): 66,
          ('york', 'state'): 83,
          ('one', 'day'): 55,
          ('the', 'investigation'): 113,
          ('p', 'still'): 230,
          ('and', 'become'): 50,
          ('for', 'families'): 70,
          ('are', 'with'): 228,
          ('in', 'most'): 153,
          ('about', 'he'): 94,
          ('after', 'and'): 68,
          ('the', 'government'): 947,
          ('that', 'his'): 361,
          ('at', 'her'): 63,
          ('those', 'in'): 166,
          ('in', 'short'): 51,
          ('arguing', 'that'): 59,
          ('they', 'get'): 85,
          ('monday', 'the'): 56,
          ('parents', 'and'): 60,
          ('sent', 'to'): 103,
          ('a', 'when'): 238,
          ('the', 'do'): 150,
          ('the', 'had'): 770,
          ('and', 'others'): 199,
          ('the', 'health'): 216,
          ('where', 'she'): 64,
          ('election', 'the'): 75,
          ('americans', 'in'): 75,
          ('high', 'the'): 81,
          ('grew', 'up'): 119,
          ('and', 'down'): 62,
          ('the', 'speaker'): 51,
          ('they', 'about'): 75,
          ('could', 'be'): 459,
          ('politics', 'p'): 116,
          ('reduce', 'the'): 98,
          ('the', 'deal'): 233,
          ('of', 'over'): 137,
          ('to', '15'): 63,
          ('is', 'good'): 86,
          ('he', 'mr'): 196,
          ('view', 'of'): 80,
          ('no', 'a'): 77,
          ('israel', 'the'): 55,
          ('republican', 'and'): 151,
          ('that', 'make'): 152,
          ('to', 'middle'): 55,
          ('hillary', 'clinton'): 274,
          ('to', 'was'): 143,
          ('so', 'are'): 64,
          ('first', 'to'): 185,
          ('advantage', 'of'): 67,
          ('p', 'it'): 966,
          ('both', 'the'): 205,
          ('federal', 'the'): 89,
          ('civil', 'liberties'): 63,
          ('has', 'more'): 167,
          ('the', 'f'): 388,
          ('of', 'refugees'): 60,
          ('be', 'as'): 224,
          ('offered', 'a'): 58,
          ('has', 'mr'): 59,
          ('they', 'mr'): 63,
          ('not', 'at'): 93,
          ('perhaps', 'the'): 73,
          ('i', "don't"): 301,
          ('so', 'we'): 91,
          ('p', 'where'): 61,
          ('you', 'get'): 95,
          ('his', 'from'): 81,
          ('at', 'news'): 80,
          ('in', 'speech'): 106,
          ('a', 'history'): 85,
          ('obama', 'and'): 117,
          ('in', 'years'): 366,
          ('trump', 'is'): 120,
          ('of', 'income'): 222,
          ('was', 'too'): 56,
          ('what', 'are'): 123,
          ('to', 'control'): 76,
          ('members', 'to'): 55,
          ('democrats', 'and'): 57,
          ('from', 'with'): 61,
          ('and', 'may'): 101,
          ('to', 'whether'): 105,
          ('away', 'the'): 117,
          ('the', 'judge'): 91,
          ('for', 'one'): 106,
          ('a', 'senator'): 82,
          ('a', 'period'): 66,
          ('vote', 'for'): 78,
          ('all', 'it'): 53,
          ('free', 'to'): 52,
          ('of', 'policies'): 71,
          ('saying', 'that'): 139,
          ('head', 'the'): 80,
          ('in', 'way'): 238,
          ('had', 'his'): 120,
          ('5', 'million'): 84,
          ('on', 'we'): 83,
          ('he', 'of'): 217,
          ('news', 'and'): 95,
          ('many', 'have'): 153,
          ('central', 'the'): 63,
          ('a', 'speech'): 137,
          ('he', 'one'): 66,
          ('with', 'it'): 125,
          ('oil', 'and'): 74,
          ('as', 'more'): 86,
          ('was', 'what'): 63,
          ('the', 'prospect'): 57,
          ('and', 'only'): 121,
          ('time', 'as'): 58,
          ('of', 'center'): 59,
          ('are', 'be'): 96,
          ('police', 'department'): 77,
          ('spending', 'on'): 58,
          ('from', 'the'): 2109,
          ('the', 'local'): 95,
          ('public', 'p'): 140,
          ('with', 'them'): 77,
          ('among', 'them'): 52,
          ('and', 'new'): 238,
          ('black', 'lives'): 56,
          ('more', 'is'): 73,
          ('who', 'were'): 239,
          ('to', 'comment'): 89,
          ('the', 'called'): 92,
          ('p', 'e'): 51,
          ('different', 'the'): 78,
          ('while', 'was'): 67,
          ('came', 'from'): 79,
          ('are', 'they'): 103,
          ('an', 'economic'): 53,
          ('that', 'about'): 104,
          ('told', 'the'): 201,
          ('to', 'sign'): 60,
          ('one', 'most'): 141,
          ('that', 'can'): 378,
          ('consider', 'the'): 61,
          ('she', 'of'): 74,
          ('had', 'and'): 151,
          ('attention', 'the'): 66,
          ('him', 'as'): 124,
          ('p', 'has'): 695,
          ('the', 'author'): 64,
          ('are', 'on'): 236,
          ('i', 'just'): 69,
          ('party', 'to'): 87,
          ('the', 'all'): 179,
          ('an', 'increase'): 61,
          ('she', 'was'): 464,
          ('twitter', 'and'): 514,
          ('of', 'climate'): 69,
          ('and', 'when'): 218,
          ('during', 'a'): 131,
          ('p', 'he'): 999,
          ('and', 'killed'): 61,
          ('clinton', 'p'): 78,
          ('a', 'handful'): 56,
          ('to', 'away'): 75,
          ('the', 'west'): 163,
          ('me', 'twitter'): 83,
          ('you', 'he'): 55,
          ('about', 'that'): 161,
          ('income', 'inequality'): 273,
          ('the', 'officers'): 107,
          ('of', 'including'): 156,
          ('officials', 'p'): 111,
          ('whether', 'a'): 57,
          ('and', 'first'): 100,
          ('one', 'and'): 143,
          ('the', 'team'): 95,
          ('the', 'years'): 528,
          ('this', 'i'): 80,
          ('united', 'has'): 81,
          ('work', 'force'): 56,
          ('we', 'will'): 169,
          ('it', 'unclear'): 51,
          ('data', 'the'): 118,
          ('washington', 'a'): 60,
          ('the', 'being'): 141,
          ('is', 'and'): 586,
          ('the', 'goal'): 76,
          ('obama', 'p'): 65,
          ('a', 'just'): 72,
          ('law', 'a'): 54,
          ('time', 'is'): 73,
          ('at', 'which'): 64,
          ('enforcement', 'the'): 58,
          ('the', 'biggest'): 160,
          ('p', 'since'): 134,
          ('but', 'in'): 432,
          ('to', 'time'): 64,
          ('a', 'moment'): 92,
          ('asked', 'if'): 55,
          ('it', 'be'): 520,
          ('his', 'wife'): 207,
          ('such', 'p'): 63,
          ('a', 'church'): 50,
          ('the', 'greatest'): 67,
          ('would', 'more'): 82,
          ('to', 'education'): 60,
          ('she', 'she'): 145,
          ('supporters', 'of'): 70,
          ('keep', 'the'): 118,
          ('p', "that's"): 143,
          ('conservative', 'and'): 53,
          ('done', 'the'): 70,
          ('programs', 'and'): 58,
          ('day', 'p'): 97,
          ('without', 'a'): 109,
          ('like', 'the'): 473,
          ('security', 'and'): 154,
          ('for', 'months'): 76,
          ('that', 'get'): 53,
          ('problem', 'p'): 68,
          ('are', 'often'): 114,
          ('large', 'of'): 119,
          ('p', 'to'): 1123,
          ('be', 'with'): 172,
          ('family', 'p'): 94,
          ('by', 'her'): 58,
          ('among', 'p'): 59,
          ('e', 'c'): 53,
          ('even', 'more'): 135,
          ('in', 'country'): 308,
          ('that', 'means'): 53,
          ('have', 'p'): 234,
          ('to', 'set'): 57,
          ('fight', 'the'): 97,
          ('me', 'to'): 95,
          ('americans', 'are'): 83,
          ('right', 'in'): 55,
          ('p', 'case'): 84,
          ('so', 'could'): 56,
          ('much', 'p'): 71,
          ('the', 'number'): 315,
          ('said', 'that'): 1187,
          ('than', 'an'): 51,
          ('forces', 'the'): 76,
          ('order', 'the'): 51,
          ('source', 'of'): 92,
          ('care', 'and'): 135,
          ('after', 'to'): 61,
          ('their', 'with'): 82,
          ('iran', 'and'): 52,
          ('a', 'now'): 73,
          ('was', 'or'): 64,
          ('can', 'get'): 57,
          ('not', 'in'): 404,
          ('a', 'they'): 188,
          ('to', 'engage'): 50,
          ('000', 'of'): 64,
          ('at', 'of'): 880,
          ('me', 'and'): 117,
          ('social', 'security'): 110,
          ('a', 'bad'): 77,
          ('time', 'on'): 58,
          ('other', 'that'): 130,
          ('the', 'saying'): 73,
          ('a', 'former'): 408,
          ('job', 'p'): 61,
          ('the', 'proposed'): 55,
          ('new', 'of'): 199,
          ('of', 'states'): 210,
          ('mr', 'had'): 299,
          ('child', 'and'): 54,
          ('he', 'left'): 53,
          ('having', 'a'): 87,
          ('of', 'them'): 438,
          ('a', 'where'): 189,
          ('so', 'the'): 348,
          ('were', 'p'): 205,
          ('in', 'were'): 103,
          ('and', 'sanders'): 57,
          ('more', 'they'): 62,
          ('my', 'and'): 140,
          ('the', 'comes'): 53,
          ('for', 'as'): 176,
          ('the', 'senator'): 140,
          ('go', 'p'): 70,
          ('program', 'p'): 51,
          ('his', 'the'): 677,
          ('ago', 'p'): 60,
          ('democratic', 'the'): 63,
          ('came', 'in'): 67,
          ('a', 'less'): 56,
          ('two', 'and'): 116,
          ('be', 'mr'): 97,
          ('is', 'how'): 105,
          ('the', 'itself'): 59,
          ('that', 'could'): 477,
          ('other', 'for'): 51,
          ('the', 'effects'): 92,
          ('of', 'some'): 167,
          ('a', 'from'): 514,
          ('debate', 'the'): 114,
          ('of', 'she'): 189,
          ('the', 'report'): 260,
          ('to', 'look'): 92,
          ('around', 'p'): 110,
          ('have', 'been'): 1204,
          ('in', 'chicago'): 50,
          ('the', 'chance'): 55,
          ('data', 'that'): 51,
          ('governor', 'of'): 74,
          ('commitment', 'to'): 66,
          ('we', 'all'): 111,
          ('how', 'much'): 157,
          ('to', 'better'): 106,
          ('the', 'video'): 104,
          ('the', 'bronx'): 63,
          ('case', 'for'): 56,
          ('well', 'as'): 301,
          ('for', 'his'): 387,
          ('the', 'interest'): 55,
          ('with', 'or'): 112,
          ('north', 'charleston'): 59,
          ('doing', 'the'): 73,
          ('one', 'point'): 57,
          ('clinton', 'in'): 91,
          ('that', 'say'): 56,
          ('country', 'to'): 95,
          ('and', 'was'): 585,
          ('was', 'on'): 327,
          ('the', 'banks'): 52,
          ('as', 'the'): 1896,
          ('e', 'a'): 125,
          ('people', 'they'): 84,
          ('question', 'whether'): 61,
          ('ways', 'that'): 59,
          ('to', 'work'): 397,
          ('likely', 'be'): 113,
          ('laws', 'and'): 51,
          ('of', 'change'): 92,
          ('since', 'and'): 50,
          ('p', 'every'): 64,
          ('the', 'federal'): 523,
          ('need', 'the'): 103,
          ('more', 'years'): 90,
          ('behind', 'p'): 52,
          ('two', 'in'): 107,
          ('several', 'the'): 82,
          ('in', 'that'): 1226,
          ('held', 'a'): 56,
          ('an', 'independent'): 55,
          ('and', 'get'): 147,
          ('have', 'become'): 105,
          ('higher', 'than'): 69,
          ('give', 'to'): 70,
          ('involved', 'the'): 95,
          ('70', 'percent'): 53,
          ('is', 'a'): 2450,
          ('who', 'made'): 50,
          ('women', 'who'): 119,
          ('the', 'schools'): 67,
          ('to', 'an'): 700,
          ('the', 'foundation'): 180,
          ('free', 'and'): 54,
          ('the', 'former'): 316,
          ('get', 'out'): 78,
          ('politics', 'newsletter'): 67,
          ('in', 'high'): 60,
          ('in', '2016'): 101,
          ('addition', 'to'): 81,
          ('wrote', 'a'): 80,
          ('campaign', 'that'): 82,
          ('years', 'to'): 156,
          ('and', 'said'): 462,
          ('that', 'after'): 58,
          ('to', 'develop'): 52,
          ('mr', "obama's"): 188,
          ('so', 'you'): 59,
          ('just', 'for'): 58,
          ('had', 'no'): 130,
          ('time', 'said'): 55,
          ('not', 'clear'): 60,
          ('is', 'he'): 180,
          ('american', 'in'): 152,
          ('and', 'would'): 289,
          ('from', 'said'): 53,
          ('be', 'and'): 340,
          ('and', 'among'): 94,
          ('funding', 'for'): 60,
          ('in', 'san'): 154,
          ('to', 'attention'): 59,
          ('believe', 'in'): 53,
          ('to', 'home'): 94,
          ('of', 'poor'): 110,
          ('issue', 'is'): 55,
          ('think', "it's"): 50,
          ('marriage', 'in'): 52,
          ('a', 'high'): 139,
          ('to', 'things'): 75,
          ('that', 'some'): 160,
          ('were', 'by'): 200,
          ('president', 'said'): 62,
          ('well', 'the'): 190,
          ('violence', 'and'): 51,
          ('on', 'island'): 54,
          ('win', 'the'): 83,
          ('making', 'the'): 114,
          ('in', 'school'): 107,
          ('had', 'in'): 304,
          ('public', 'in'): 113,
          ('used', 'be'): 50,
          ('spokesman', 'for'): 90,
          ('in', 'industry'): 62,
          ('a', 'small'): 216,
          ('should', 'to'): 175,
          ('because', 'not'): 79,
          ('that', 'time'): 108,
          ('p', 'mr'): 2122,
          ('city', 'and'): 95,
          ('been', 'by'): 335,
          ('the', 'right'): 447,
          ('given', 'the'): 158,
          ('at', 'is'): 68,
          ('what', 'about'): 72,
          ('university', 'of'): 366,
          ('a', 'broader'): 68,
          ('we', 'now'): 52,
          ('leaders', 'to'): 77,
          ('right', 'and'): 91,
          ('to', 'any'): 192,
          ('all', 'in'): 150,
          ('future', 'p'): 64,
          ('chairman', 'of'): 140,
          ('under', 'p'): 61,
          ('every', 'p'): 61,
          ('evidence', 'the'): 66,
          ('of', 'do'): 58,
          ('of', 'weapons'): 53,
          ('left', 'in'): 66,
          ('to', 'republican'): 87,
          ('who', 'do'): 66,
          ('was', 'to'): 1302,
          ('to', 'stop'): 149,
          ('the', 'environmental'): 65,
          ('around', 'a'): 56,
          ('get', 'a'): 172,
          ('system', 'that'): 98,
          ('asked', 'a'): 55,
          ('has', 'also'): 145,
          ('a', 'culture'): 64,
          ("it's", 'going'): 50,
          ('to', 'part'): 59,
          ('comes', 'to'): 138,
          ('of', 'where'): 119,
          ('p', 'states'): 94,
          ('department', 'a'): 61,
          ('over', 'past'): 114,
          ('government', 'is'): 76,
          ('campaign', 'and'): 84,
          ('scott', 'walker'): 52,
          ('the', 'police'): 667,
          ('p', 'senator'): 104,
          ('which', 'have'): 95,
          ('a', 'called'): 103,
          ('poverty', 'the'): 130,
          ('in', 'i'): 238,
          ('got', 'to'): 82,
          ('of', 'party'): 118,
          ('for', 'so'): 60,
          ('on', 'are'): 74,
          ('concerned', 'about'): 63,
          ('recent', 'years'): 167,
          ('a', 'local'): 81,
          ('that', 'she'): 352,
          ('the', 'houthis'): 54,
          ('p', 'other'): 201,
          ('the', 'institute'): 156,
          ('that', 'same'): 61,
          ('that', 'much'): 70,
          ('the', 'homeless'): 83,
          ('worked', 'with'): 59,
          ('at', 'end'): 101,
          ('national', 'the'): 94,
          ('of', "that's"): 50,
          ('part', 'of'): 722,
          ('and', "they're"): 54,
          ('the', 'benefits'): 99,
          ('on', 'these'): 51,
          ('i', 'my'): 111,
          ('there', 'a'): 490,
          ('be', 'for'): 306,
          ('man', 'the'): 71,
          ('to', 'obama'): 96,
          ('has', 'p'): 127,
          ('a', 'presidential'): 103,
          ('p', 'and'): 1173,
          ('p', 'well'): 50,
          ('follow', 'me'): 84,
          ('which', 'had'): 82,
          ('engaged', 'in'): 60,
          ('against', 'a'): 98,
          ('other', 'as'): 51,
          ('years', 'later'): 74,
          ('percent', 'said'): 56,
          ('would', 'to'): 621,
          ('of', 'security'): 108,
          ('gay', 'and'): 54,
          ('of', 'groups'): 81,
          ('n', 'y'): 60,
          ('this', 'of'): 340,
          ('national', 'in'): 68,
          ('said', 'if'): 98,
          ('says', 'the'): 86,
          ('with', 'in'): 420,
          ('era', 'of'): 64,
          ('concluded', 'that'): 72,
          ('which', 'is'): 395,
          ('do', 'i'): 58,
          ('our', 'the'): 152,
          ('on', 'whether'): 52,
          ('obama', 'administration'): 163,
          ('put', 'it'): 98,
          ('with', 'about'): 71,
          ('or', 'from'): 50,
          ('the', 'matter'): 78,
          ('and', 'up'): 598,
          ('the', 'my'): 91,
          ('and', 'white'): 62,
          ('in', 'right'): 56,
          ('even', 'of'): 105,
          ('d', 'a'): 95,
          ('to', 'sell'): 70,
          ('to', 'congress'): 66,
          ('concerns', 'about'): 61,
          ('for', 'state'): 83,
          ('the', 'have'): 784,
          ('in', 'of'): 2271,
          ('mr', 'and'): 612,
          ('to', 'every'): 83,
          ('is', 'the'): 2475,
          ('and', 'be'): 248,
          ('path', 'to'): 55,
          ('candidates', 'to'): 70,
          ('for', 'these'): 57,
          ('rather', 'a'): 63,
          ('even', 'with'): 50,
          ('to', 'one'): 289,
          ('one', 'the'): 1032,
          ('they', 'not'): 305,
          ('in', 'debate'): 68,
          ('the', 'trump'): 99,
          ('on', 'campaign'): 58,
          ('10', 'of'): 67,
          ('into', 'an'): 51,
          ('he', 'asked'): 53,
          ('cut', 'the'): 51,
          ('in', 'system'): 51,
          ('the', 'fight'): 134,
          ('officials', 'said'): 262,
          ('even', 'a'): 178,
          ('program', 'to'): 80,
          ('000', 'year'): 69,
          ('the', 'ones'): 108,
          ('and', 'over'): 149,
          ('that', 'when'): 139,
          ('only', 'in'): 137,
          ('all', 'those'): 55,
          ('to', 'nuclear'): 51,
          ('in', 'front'): 105,
          ('of', 'court'): 71,
          ('has', 'never'): 75,
          ('mr', "bush's"): 98,
          ('to', 'win'): 144,
          ('have', 'up'): 67,
          ('there', 'were'): 225,
          ('p', 'republican'): 90,
          ('spoke', 'the'): 91,
          ('to', 'themselves'): 99,
          ('next', 'the'): 90,
          ('in', 'all'): 160,
          ('of', 'florida'): 89,
          ('government', 'of'): 77,
          ('the', 'so-called'): 76,
          ('most', 'recent'): 63,
          ('the', 'very'): 185,
          ('and', 'about'): 290,
          ('of', '10'): 54,
          ('and', 'financial'): 56,
          ('is', 'all'): 104,
          ('so', 'and'): 95,
          ('and', 'according'): 71,
          ('on', 'they'): 120,
          ('happened', 'the'): 50,
          ('a', 'little'): 166,
          ('the', 'issue'): 329,
          ('at', 'they'): 58,
          ('tied', 'to'): 55,
          ('of', 'young'): 100,
          ('or', 'and'): 143,
          ('000', 'to'): 110,
          ('do', 'to'): 233,
          ('this', 'was'): 203,
          ('you', 'be'): 73,
          ('p', 'among'): 74,
          ('the', 'but'): 1003,
          ('that', 'any'): 79,
          ('that', 'might'): 161,
          ('because', 'he'): 146,
          ('all', 'is'): 68,
          ('part', 'a'): 163,
          ('they', 'say'): 95,
          ('with', 'from'): 104,
          ('government', 'and'): 181,
          ('in', 'has'): 252,
          ('had', 'not'): 207,
          ('only', 'percent'): 113,
          ('food', 'stamps'): 74,
          ('have', 'so'): 55,
          ('to', 'campaign'): 74,
          ('but', 'if'): 129,
          ('an', 'but'): 85,
          ('on', 'an'): 177,
          ('you', "can't"): 94,
          ('those', 'and'): 80,
          ('he', 'for'): 195,
          ('the', 'opinion'): 364,
          ('decision', 'the'): 89,
          ('hold', 'the'): 67,
          ('been', 'with'): 111,
          ('things', 'that'): 67,
          ('months', 'the'): 113,
          ('the', 'impact'): 108,
          ('that', 'or'): 80,
          ('long', 'of'): 78,
          ('government', 'p'): 150,
          ('showed', 'that'): 71,
          ('fact', 'the'): 93,
          ('states', 'and'): 273,
          ('than', 'as'): 53,
          ('here', 'that'): 50,
          ('conference', 'in'): 51,
          ('them', 'with'): 82,
          ('the', 'we'): 506,
          ('a', 'couple'): 100,
          ('in', 'what'): 130,
          ('past', 'years'): 100,
          ('himself', 'as'): 69,
          ('to', 'support'): 228,
          ('you', 'up'): 51,
          ('known', 'for'): 65,
          ('of', 'with'): 431,
          ('senator', 'sanders'): 89,
          ('immigration', 'and'): 61,
          ('great', 'of'): 60,
          ('them', 'of'): 66,
          ('in', '2011'): 191,
          ('today', 'newsletter'): 350,
          ('this', 'been'): 54,
          ('of', 'both'): 85,
          ('media', 'and'): 66,
          ('were', 'with'): 124,
          ('support', 'the'): 224,
          ('real', 'estate'): 123,
          ('the', 'early'): 142,
          ('to', 'them'): 437,
          ('i', 'want'): 154,
          ('these', 'p'): 65,
          ('at', 'top'): 85,
          ('more', 'a'): 335,
          ('some', 'said'): 54,
          ('of', 'inequality'): 207,
          ('one', 'that'): 259,
          ('officials', 'and'): 95,
          ('how', 'are'): 81,
          ('was', 'no'): 146,
          ('the', 'after'): 247,
          ('like', 'a'): 332,
          ('but', 'is'): 458,
          ('to', 'research'): 53,
          ('recent', 'the'): 122,
          ('the', 'here'): 132,
          ('time', 'in'): 190,
          ('and', 'bush'): 51,
          ('2008', 'the'): 54,
          ('president', 'for'): 55,
          ('to', 'hold'): 98,
          ('appears', 'to'): 93,
          ('executive', 'director'): 104,
          ('white', 'the'): 99,
          ('issue', 'of'): 88,
          ('hours', 'the'): 56,
          ('the', 'still'): 146,
          ('less', 'than'): 323,
          ('pay', 'for'): 160,
          ('economic', 'that'): 76,
          ('for', 'years'): 371,
          ('young', 'and'): 76,
          ('world', 'is'): 83,
          ('in', 'interview'): 325,
          ('parents', 'the'): 58,
          ('a', 'bill'): 119,
          ('the', 'instead'): 58,
          ('making', 'it'): 63,
          ('the', 'under'): 75,
          ('three', 'p'): 51,
          ('to', 'think'): 117,
          ('if', 'in'): 73,
          ('new', 'hampshire'): 147,
          ('national', 'and'): 103,
          ('of', 'country'): 143,
          ('ended', 'up'): 57,
          ('to', 'maintain'): 50,
          ('p', 'have'): 534,
          ('i', 'did'): 58,
          ('added', 'that'): 116,
          ('to', 'join'): 125,
          ('americans', 'have'): 69,
          ('the', 'outcome'): 54,
          ('we', 'a'): 375,
          ('be', 'or'): 83,
          ('are', 'out'): 71,
          ('w', 'bush'): 108,
          ('who', 'the'): 1029,
          ('that', 'such'): 56,
          ('said', 'will'): 67,
          ('companies', 'are'): 53,
          ('and', 'inequality'): 109,
          ('who', 'want'): 70,
          ('case', 'of'): 89,
          ('record', 'of'): 72,
          ('our', 'p'): 237,
          ('the', 'use'): 156,
          ('and', 'policy'): 101,
          ('the', 'massacre'): 50,
          ('the', 'i'): 823,
          ('also', 'been'): 51,
          ('like', 'this'): 103,
          ('in', 'november'): 99,
          ('say', 'they'): 190,
          ('way', 'of'): 151,
          ('just', 'of'): 159,
          ('also', 'that'): 222,
          ('up', 'with'): 205,
          ('out', 'he'): 52,
          ('her', 'was'): 128,
          ('the', 'situation'): 82,
          ('work', 'p'): 157,
          ('chief', 'of'): 182,
          ('that', "he's"): 58,
          ('to', 'violence'): 56,
          ('for', 'ms'): 55,
          ('is', 'not'): 980,
          ('here', 'to'): 61,
          ('make', 'to'): 85,
          ('2014', 'the'): 92,
          ('so', 'would'): 50,
          ('at', 'age'): 66,
          ('and', 'money'): 60,
          ('own', 'p'): 150,
          ('under', 'of'): 79,
          ('a', 'against'): 126,
          ('of', 'like'): 160,
          ('program', 'in'): 59,
          ('law', 'that'): 89,
          ('of', 'will'): 183,
          ('long', 'the'): 157,
          ('and', 'yet'): 115,
          ('most', 'is'): 56,
          ('cruz', 'and'): 56,
          ('not', 'the'): 1284,
          ('of', 'civil'): 68,
          ('ways', 'to'): 104,
          ('that', 'up'): 86,
          ('growth', 'the'): 74,
          ('you', 'do'): 112,
          ('than', 'they'): 100,
          ('the', 'line'): 167,
          ('general', 'the'): 74,
          ('the', 'vatican'): 75,
          ('and', 'students'): 55,
          ('even', 'for'): 75,
          ('lives', 'in'): 69,
          ('but', 'would'): 88,
          ('review', 'the'): 62,
          ('in', 'name'): 52,
          ('the', 'result'): 103,
          ('then', 'he'): 55,
          ('p', 'but'): 1648,
          ('which', 'been'): 67,
          ('and', 'p'): 2105,
          ('the', 'paris'): 53,
          ('of', 'say'): 67,
          ('by', 'to'): 276,
          ('p', 'course'): 68,
          ('about', 'said'): 59,
          ("that's", 'to'): 50,
          ('a', 'clinton'): 50,
          ('the', 'death'): 182,
          ('washington', 'on'): 86,
          ('they', 'can'): 172,
          ('had', 'taken'): 66,
          ('also', 'to'): 318,
          ('p', "here's"): 50,
          ('they', 'also'): 118,
          ('administration', 'the'): 109,
          ('head', 'of'): 105,
          ('americans', 'p'): 67,
          ('the', 'safety'): 113,
          ('now', 'he'): 54,
          ('his', 'had'): 116,
          ('the', 'he'): 1221,
          ('wrong', 'the'): 50,
          ('or', 'or'): 115,
          ('lives', 'the'): 74,
          ('s', 'to'): 50,
          ('in', 'north'): 92,
          ('and', 'found'): 88,
          ('c', 'c'): 61,
          ('said', 'when'): 90,
          ('and', 'president'): 122,
          ('tuesday', 'the'): 68,
          ('mr', 'also'): 124,
          ('need', 'to'): 600,
          ('he', 'can'): 95,
          ('i', 'can'): 102,
          ('said', 'were'): 177,
          ('p', 'washington'): 220,
          ('the', 'truth'): 113,
          ('very', 'different'): 56,
          ('referred', 'to'): 54,
          ('p', 'do'): 137,
          ('should', 'in'): 56,
          ('big', 'in'): 58,
          ('and', 'years'): 61,
          ('had', 'but'): 56,
          ('to', 'safety'): 69,
          ('far', 'than'): 74,
          ('can', 'make'): 51,
          ...})

In [37]:
poverty.apply_ngram_filter(lambda x,y: 'poverty' not in x+y)

In [40]:
poverty.apply_freq_filter(5)

In [41]:
poverty.ngram_fd


Out[41]:
FreqDist({('a', 'poverty'): 26,
          ('abject', 'poverty'): 5,
          ('about', 'poverty'): 22,
          ('above', 'poverty'): 5,
          ('address', 'poverty'): 8,
          ('against', 'poverty'): 7,
          ('americans', 'poverty'): 6,
          ('an', 'anti-poverty'): 7,
          ('an', 'poverty'): 5,
          ('and', 'poverty'): 65,
          ('anti-poverty', 'programs'): 8,
          ('antipoverty', 'programs'): 5,
          ('are', 'poverty'): 5,
          ('as', 'poverty'): 8,
          ('at', 'poverty'): 11,
          ('be', 'poverty'): 7,
          ('below', 'poverty'): 25,
          ('between', 'poverty'): 5,
          ('by', 'poverty'): 9,
          ('child', 'poverty'): 7,
          ('childhood', 'poverty'): 11,
          ('children', 'poverty'): 7,
          ('combating', 'poverty'): 6,
          ('concentrated', 'poverty'): 5,
          ('cycle', 'poverty'): 6,
          ('deep', 'poverty'): 5,
          ('dire', 'poverty'): 5,
          ('economic', 'poverty'): 5,
          ('end', 'poverty'): 9,
          ('escaping', 'poverty'): 8,
          ('evicted', 'poverty'): 7,
          ('extreme', 'poverty'): 46,
          ('federal', 'poverty'): 13,
          ('fight', 'poverty'): 7,
          ('fighting', 'poverty'): 10,
          ('for', 'poverty'): 12,
          ('from', 'poverty'): 21,
          ('global', 'poverty'): 14,
          ('has', 'poverty'): 6,
          ('have', 'poverty'): 7,
          ('high', 'poverty'): 5,
          ('in', 'poverty'): 100,
          ('income', 'poverty'): 6,
          ('inequality', 'poverty'): 16,
          ('into', 'poverty'): 19,
          ('is', 'poverty'): 8,
          ('issues', 'poverty'): 6,
          ('its', 'poverty'): 8,
          ('level', 'poverty'): 6,
          ('like', 'poverty'): 7,
          ('live', 'poverty'): 23,
          ('lives', 'poverty'): 8,
          ('living', 'poverty'): 24,
          ('more', 'poverty'): 10,
          ('not', 'poverty'): 6,
          ('odds', 'poverty'): 8,
          ('of', 'antipoverty'): 5,
          ('of', 'poverty'): 212,
          ('official', 'poverty'): 5,
          ('on', 'poverty'): 45,
          ('or', 'poverty'): 8,
          ('out', 'poverty'): 53,
          ('p', 'poverty'): 21,
          ('people', 'poverty'): 30,
          ('poverty', '2030'): 8,
          ('poverty', 'a'): 43,
          ('poverty', 'about'): 5,
          ('poverty', 'according'): 5,
          ('poverty', 'also'): 6,
          ('poverty', 'america'): 7,
          ('poverty', 'among'): 9,
          ('poverty', 'an'): 6,
          ('poverty', 'and'): 198,
          ('poverty', 'are'): 14,
          ('poverty', 'around'): 5,
          ('poverty', 'as'): 17,
          ('poverty', 'at'): 14,
          ('poverty', 'be'): 11,
          ('poverty', 'because'): 5,
          ('poverty', 'but'): 23,
          ('poverty', 'by'): 22,
          ('poverty', 'center'): 12,
          ('poverty', 'climate'): 5,
          ('poverty', 'could'): 5,
          ('poverty', 'crime'): 7,
          ('poverty', 'desmond'): 5,
          ('poverty', 'disease'): 5,
          ('poverty', 'for'): 17,
          ('poverty', 'francis'): 5,
          ('poverty', 'from'): 11,
          ('poverty', 'has'): 20,
          ('poverty', 'have'): 9,
          ('poverty', 'he'): 20,
          ('poverty', 'health'): 7,
          ('poverty', 'hunger'): 6,
          ('poverty', 'i'): 8,
          ('poverty', 'in'): 72,
          ('poverty', 'income'): 9,
          ('poverty', 'inequality'): 32,
          ('poverty', 'into'): 5,
          ('poverty', 'is'): 49,
          ('poverty', 'issues'): 6,
          ('poverty', 'it'): 17,
          ('poverty', "it's"): 6,
          ('poverty', 'just'): 7,
          ('poverty', 'law'): 12,
          ('poverty', 'level'): 8,
          ('poverty', 'levels'): 5,
          ('poverty', 'line'): 42,
          ('poverty', 'more'): 13,
          ('poverty', 'mr'): 17,
          ('poverty', 'not'): 15,
          ('poverty', 'now'): 7,
          ('poverty', 'of'): 23,
          ('poverty', 'on'): 10,
          ('poverty', 'one'): 8,
          ('poverty', 'or'): 27,
          ('poverty', 'other'): 5,
          ('poverty', 'over'): 5,
          ('poverty', 'p'): 81,
          ('poverty', 'percent'): 6,
          ('poverty', 'poverty'): 6,
          ('poverty', 'profit'): 7,
          ('poverty', 'project'): 8,
          ('poverty', 'rate'): 25,
          ('poverty', 'rates'): 10,
          ('poverty', 'reduction'): 9,
          ('poverty', 'said'): 27,
          ('poverty', 'seem'): 5,
          ('poverty', 'so'): 7,
          ('poverty', 'still'): 5,
          ('poverty', 'than'): 15,
          ('poverty', 'that'): 27,
          ('poverty', 'the'): 130,
          ('poverty', 'their'): 5,
          ('poverty', 'they'): 10,
          ('poverty', 'this'): 20,
          ('poverty', 'through'): 5,
          ('poverty', 'to'): 38,
          ('poverty', 'united'): 5,
          ('poverty', 'violence'): 5,
          ('poverty', 'was'): 13,
          ('poverty', 'we'): 10,
          ('poverty', 'welfare'): 5,
          ('poverty', 'who'): 10,
          ('poverty', 'will'): 6,
          ('poverty', 'with'): 8,
          ('poverty', 'worldwide'): 6,
          ('poverty', 'would'): 10,
          ('progress', 'poverty'): 6,
          ('reduce', 'poverty'): 10,
          ('reducing', 'poverty'): 7,
          ('rise', 'poverty'): 5,
          ('solution', 'poverty'): 5,
          ('southern', 'poverty'): 12,
          ('that', 'poverty'): 15,
          ('the', 'antipoverty'): 8,
          ('the', 'poverty'): 184,
          ('their', 'poverty'): 11,
          ('to', 'poverty'): 85,
          ('unemployment', 'poverty'): 7,
          ('united', 'poverty'): 5,
          ('urban', 'poverty'): 5,
          ('war', 'poverty'): 16,
          ('was', 'poverty'): 7,
          ('way', 'poverty'): 6,
          ('will', 'poverty'): 5,
          ('with', 'poverty'): 16})

In [49]:
class_colls = copy.copy(collocates)
class_colls.apply_ngram_filter(lambda x,y: x != 'class' and y!='class')

In [51]:
class_colls.apply_freq_filter(5)

In [54]:
class_colls.ngram_fd)


Out[54]:
FreqDist({('a', 'class'): 40,
          ('about', 'class'): 10,
          ('american', 'class'): 8,
          ('and', 'class'): 39,
          ('as', 'class'): 14,
          ('billionaire', 'class'): 7,
          ('by', 'class'): 8,
          ('class', 'a'): 29,
          ('class', 'about'): 6,
          ('class', 'and'): 71,
          ('class', 'are'): 12,
          ('class', 'as'): 12,
          ('class', 'at'): 7,
          ('class', 'be'): 7,
          ('class', 'but'): 21,
          ('class', 'by'): 6,
          ('class', 'even'): 6,
          ('class', 'from'): 7,
          ('class', 'had'): 5,
          ('class', 'has'): 9,
          ('class', 'have'): 5,
          ('class', 'he'): 11,
          ('class', 'i'): 10,
          ('class', 'if'): 5,
          ('class', 'in'): 35,
          ('class', 'inequality'): 6,
          ('class', 'is'): 24,
          ('class', 'it'): 10,
          ('class', "it's"): 9,
          ('class', 'like'): 8,
          ('class', 'more'): 9,
          ('class', 'mr'): 12,
          ('class', 'not'): 7,
          ('class', 'of'): 25,
          ('class', 'on'): 9,
          ('class', 'or'): 7,
          ('class', 'p'): 67,
          ('class', 'poor'): 6,
          ('class', 'said'): 11,
          ('class', 'than'): 8,
          ('class', 'that'): 26,
          ('class', 'the'): 86,
          ('class', 'their'): 5,
          ('class', 'they'): 5,
          ('class', 'this'): 7,
          ('class', 'to'): 22,
          ('class', 'war'): 5,
          ('class', 'warfare'): 6,
          ('class', 'was'): 8,
          ('class', 'well'): 5,
          ('class', 'when'): 7,
          ('class', 'who'): 9,
          ('class', 'with'): 10,
          ('first', 'class'): 6,
          ('for', 'class'): 19,
          ('from', 'class'): 5,
          ('his', 'class'): 5,
          ('in', 'class'): 23,
          ('into', 'class'): 11,
          ('is', 'class'): 5,
          ('lower', 'class'): 5,
          ('middle', 'class'): 203,
          ('new', 'class'): 7,
          ('of', 'class'): 60,
          ('on', 'class'): 15,
          ('or', 'class'): 13,
          ('p', 'class'): 9,
          ('political', 'class'): 9,
          ('poor', 'class'): 8,
          ('race', 'class'): 8,
          ('social', 'class'): 8,
          ('that', 'class'): 9,
          ('the', 'class'): 215,
          ('to', 'class'): 18,
          ('upper', 'class'): 6,
          ('white', 'class'): 6,
          ('with', 'class'): 6,
          ('working', 'class'): 41})

In [59]:
class_colls_df = pd.DataFrame([(' - '.join(k), v) for k,v in class_colls.ngram_fd.items()], columns=('collocate pair','freq'))

In [64]:
class_colls_df.sort('freq', ascending=False)


/Users/Matt/anaconda3/lib/python3.4/site-packages/ipykernel/__main__.py:1: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
  if __name__ == '__main__':
Out[64]:
collocate pair freq
4 the - class 215
51 middle - class 203
11 class - the 86
45 class - and 71
72 class - p 67
59 of - class 60
47 working - class 41
75 a - class 40
26 and - class 39
57 class - in 35
6 class - a 29
3 class - that 26
77 class - of 25
28 class - is 24
9 in - class 23
13 class - to 22
49 class - but 21
66 for - class 19
41 to - class 18
31 on - class 15
54 as - class 14
1 or - class 13
32 class - are 12
30 class - as 12
58 class - mr 12
61 class - he 11
15 class - said 11
27 into - class 11
63 class - with 10
64 class - it 10
... ... ...
60 class - at 7
16 class - from 7
38 class - not 7
56 new - class 7
37 class - when 7
25 class - be 7
29 class - this 7
2 billionaire - class 7
76 class - or 7
69 class - inequality 6
71 white - class 6
73 class - even 6
18 class - about 6
33 class - warfare 6
42 first - class 6
10 with - class 6
70 upper - class 6
40 class - by 6
68 class - poor 6
5 class - if 5
74 from - class 5
19 class - had 5
65 is - class 5
20 lower - class 5
24 class - their 5
48 class - they 5
34 class - have 5
35 class - well 5
36 class - war 5
0 his - class 5

78 rows × 2 columns


In [67]:
def get_collocates(word, collocates, min_freq=5):
    wcolls = copy.copy(collocates)
    wcolls.apply_ngram_filter(lambda x,y: word!=x and word!=y)
    wcolls.apply_freq_filter(min_freq)
    wcolls_df = pd.DataFrame([(' - '.join(k), v) for k,v in wcolls.ngram_fd.items()], columns=('collocate pair','freq'))
    return wcolls_df.sort('freq',ascending=False)

In [71]:
get_collocates('inequality',collocates)


/Users/Matt/anaconda3/lib/python3.4/site-packages/ipykernel/__main__.py:6: FutureWarning: sort(columns=....) is deprecated, use sort_values(by=.....)
Out[71]:
collocate pair freq
97 income - inequality 273
46 of - inequality 207
132 inequality - the 182
152 inequality - and 159
85 inequality - p 137
144 and - inequality 109
89 the - inequality 107
161 inequality - in 104
48 to - inequality 99
20 inequality - is 84
113 inequality - a 73
45 on - inequality 72
86 economic - inequality 68
146 about - inequality 67
1 that - inequality 65
51 inequality - to 64
23 in - inequality 47
81 inequality - has 45
69 inequality - of 43
96 inequality - it 43
56 inequality - that 42
84 p - inequality 41
155 rising - inequality 37
31 poverty - inequality 32
135 inequality - but 30
118 inequality - as 30
65 reduce - inequality 27
170 growing - inequality 26
74 inequality - for 21
60 inequality - not 21
... ... ...
3 inequality - had 5
172 who - inequality 5
115 level - inequality 5
9 inequality - recent 5
112 inequality - there 5
38 inequality - even 5
61 inequality - other 5
63 combat - inequality 5
37 inequality - its 5
67 problem - inequality 5
70 was - inequality 5
83 crisis - inequality 5
30 inequality - individuals 5
87 inequality - public 5
22 inequality - largely 5
111 roots - inequality 5
18 inequality - poor 5
143 inequality - up 5
120 inequality - inevitable 5
123 inequality - most 5
124 increased - inequality 5
125 inequality - years 5
128 inequality - income 5
130 inequality - some 5
133 inequality - those 5
134 inequality - among 5
14 inequality - about 5
136 inequality - you 5
141 inevitable - inequality 5
90 talk - inequality 5

181 rows × 2 columns


In [ ]: