# Instructions:

1. Compute the most important key-words (a key-word can be between 1-3 words)
2. Choose the top n words from the previously generated list. Compare these key- words with all the words occurring in all of the transcripts.
3. Generate a score (rank) for these top n words based on analysed transcripts.

``````

In [51]:

from sklearn.feature_extraction import stop_words
from nltk.corpus import stopwords

``````
``````

In [52]:

import math
from textblob import TextBlob as tb

``````
``````

In [2]:

with open("scripts/script.txt", "r") as f:

#with open("scripts/script.txt", "r") as f:

#for line in data:
#    words = data.split()

``````
``````

In [3]:

with open("scripts/transcript_1.txt", "r") as t1:

with open("scripts/transcript_2.txt", "r") as t2:

with open("scripts/transcript_3.txt", "r") as t3:

``````

# PRE-PROCESSING!

``````

In [4]:

from spacy.en import English
import nltk
from sklearn.feature_extraction.stop_words import ENGLISH_STOP_WORDS

``````
``````

In [7]:

parser = English()
parsedData = parser(data)

``````
``````

In [13]:

``````
``````

Out[13]:

Food is any substance consumed to provide nutritional support for

``````
``````

In [18]:

# All you have to do is iterate through the parsedData
# Each token is an object with lots of different properties
# A property with an underscore at the end returns the string representation
# while a property without the underscore returns an index (int) into spaCy's vocabulary
# The probability estimate is based on counts from a 3 billion word
# corpus, smoothed using the Simple Good-Turing method.
for i, token in enumerate(parsedData[0:2]):
print("original:", token.orth, token.orth_)
print("lowercased:", token.lower, token.lower_)
print("lemma:", token.lemma, token.lemma_)
print("shape:", token.shape, token.shape_)
print("prefix:", token.prefix, token.prefix_)
print("suffix:", token.suffix, token.suffix_)
print("log probability:", token.prob)
print("Brown cluster id:", token.cluster)
print("----------------------------------------")

``````
``````

original: 6832 Food
lowercased: 960 food
lemma: 960 food
shape: 354724 Xxxx
prefix: 3792 F
suffix: 155057 ood
log probability: -11.701742172241211
Brown cluster id: 838
----------------------------------------
original: 474 is
lowercased: 474 is
lemma: 488 be
shape: 21581 xx
prefix: 570 i
suffix: 474 is
log probability: -4.457748889923096
Brown cluster id: 762
----------------------------------------

``````

# TF-IDF

``````

In [ ]:

from sklearn.metrics import accuracy_score
from sklearn.feature_extraction.text import TfidfVectorizer, CountVectorizer

def tf(word, blob):
return blob.words.count(word) / len(blob.words)

def n_containing(word, bloblist):
return sum(1 for blob in bloblist if word in blob.words)

def idf(word, bloblist):
return math.log(len(bloblist) / (1 + n_containing(word, bloblist)))

def tfidf(word, blob, bloblist):
return tf(word, blob) * idf(word, bloblist)

``````
``````

In [ ]:

bloblist = []
[bloblist.append(tb(doc)) for doc in [data, t1, t2, t3]]

for i, blob in enumerate(bloblist):
print("Top words in document {}".format(i + 1))
scores = {word: tfidf(word, blob, bloblist) for word in blob.words}
sorted_words = sorted(scores.items(), key=lambda x: x[1], reverse=True)
for word, score in sorted_words[:3]:
print("Word: {}, TF-IDF: {}".format(word, round(score, 5)))

``````
``````

In [ ]:

CountVectorizer(data)

``````
``````

In [ ]:

tf = TfidfVectorizer(analyzer='word', ngram_range=(1,3), min_df = 0, stop_words = 'english')

``````
``````

In [ ]:

tfidf_matrix = tf.fit_transform(data2)

``````
``````

In [ ]:

feature_names = tf.get_feature_names()

``````
``````

In [ ]:

tfidf_matrix.shape, len(feature_names)

``````
``````

In [ ]:

dense = tfidf_matrix.todense()
episode = dense[0].tolist()[0]

``````
``````

In [ ]:

phrase_scores = [pair for pair in zip(range(0, len(episode)), episode) if pair[1] > 0]

``````
``````

In [ ]:

sorted_phrase_scores = sorted(phrase_scores, key=lambda t: t[1] * -1)
for phrase, score in [(feature_names[word_id], score) for (word_id, score) in sorted_phrase_scores][:20]:
print('{0: <20} {1}'.format(phrase, score))

``````
``````

In [ ]:

def freq(word, tokens):

#Compute the frequency for each term.
vocabulary = []
docs = {}
all_tips = []
for tip in (venue.tips()):
tokens = tokenizer.tokenize(tip.text)

bi_tokens = bigrams(tokens)
tri_tokens = trigrams(tokens)
tokens = [token.lower() for token in tokens if len(token) > 2]
tokens = [token for token in tokens if token not in stopwords]

bi_tokens = [' '.join(token).lower() for token in bi_tokens]
bi_tokens = [token for token in bi_tokens if token not in stopwords]

tri_tokens = [' '.join(token).lower() for token in tri_tokens]
tri_tokens = [token for token in tri_tokens if token not in stopwords]

final_tokens = []
final_tokens.extend(tokens)
final_tokens.extend(bi_tokens)
final_tokens.extend(tri_tokens)
docs[tip.text] = {'freq': {}}

for token in final_tokens:
docs[tip.text]['freq'][token] = freq(token, final_tokens)

print docs

``````

## RAKE

``````

In [20]:

from rake_nltk import Rake

r = Rake() # Uses stopwords for english from NLTK, and all puntuation characters.

``````
``````

In [24]:

# If you want to provide your own set of stop words and punctuations to
# r = Rake(<list of stopwords>, <string of puntuations to ignore>

r.extract_keywords_from_text(data)

``````
``````

In [23]:

r.get_ranked_phrases_with_scores() # To get keyword phrases ranked highest to lowest.

``````
``````

Out[23]:

[(35.5, 'supermarket giants wield great purchasing power'),
(35.416666666666664,
'support programs allowed major grain exporting countries'),
(33.923809523809524,
'western kitchens use variable temperature convection ovens'),
(33.09701492537313, 'wikimedia commons food timeline wikibooks cookbook'),
(31.798245614035086,
'common agricultural policy ), national government policy'),
(30.097014925373134, 'food market following george w bush'),
(29.583333333333332,
'2013 overseas development institute researchers showed'),
(29.554347826086957, 'may help remove potentially harmful chemicals'),
(28.9, 'digestive complaints usually develop within half'),
(27.668443496801704, 'taste another universal phenomenon regarding food'),
(25.69316877152698, 'raw food preparation certain cultures highlight animal'),
(24.378040566398774, 'dried foods also promote food safety'),
(24.25, 'growing trend toward sustainable agricultural practices'),
(24.234696084793423, 'evidence suggests commercial food preparation may'),
(22.330348258706465, 'decrease cancer risk since many food'),
(21.75, 'asia see rural wages rise'),
(21.464912280701753, 'onion family ), leaf vegetables'),
(21.15, 'complicated process involving many producers'),
(21.066666666666666, 'investment banks like barclays capital'),
(20.763681592039802, 'consumers worldwide faced rising food prices'),
(20.75, 'brick oven containing burning wood'),
(20.530348258706468, 'international food processing giants controlling'),
(20.458333333333332, 'roughly 7 million people die'),
(20.307692307692307, 'also support vital ecosystem services'),
(20.247014925373136, 'food include heterocyclic amines generated'),
(20.166666666666664, '000 children die per day'),
(20.15, 'conscious diets many individuals limit'),
(20.0, 'restaurants restaurants employ trained chefs'),
(19.864285714285714, 'plant sources include various edible fungi'),
(19.816666666666666, 'common examples include adding granola'),
(19.63157894736842, 'including canola oil ), sesame'),
(19.54, 'pickled foods like leavened bread'),
(19.513681592039802, '37 countries faced food crises'),
(19.429347826086957, 'people may find delicious ...'),
(18.93768115942029, 'fungal growth may contain mycotoxins'),
(18.79137529137529, 'sushi often incorporates raw fish'),
(18.61130063965885, 'greatly affected world food trade'),
(18.5, 'typically early grocery shops would'),
(18.147014925373135, 'food production include international organizations'),
(18.08034825870647, 'ensuring food safety include maintaining'),
(18.0, 'american style outdoor grill fueled'),
(17.865384615384613, 'cooking traditions using various spices'),
(17.362040133779264, '17th century europe may also'),
(17.097014925373134, 'retailing food marketing brings together'),
(16.830348258706465, 'commercial trade international food imports'),
(16.25, 'shopping using shopping carts'),
(16.0, 'homo erectus campsites dating'),
(15.884848484848483, 'specific dietary aspects influence health'),
(15.5, '260 ° c ))'),
(15.458333333333332, 'approximately 815 million people'),
(15.297014925373134, 'based food usually involves slaughter'),
(15.075324675324676, 'diet deficiencies dietary habits play'),
(15.055555555555555, 'process animals en masse'),
(14.916666666666666, '100 countries became signatories'),
(14.893939393939393, 'sensation often considered unpleasant characterized'),
(14.766666666666666, 'international water management institute'),
(14.75, 'measured using famine scales'),
(14.6, 'cause perhaps around 35'),
(14.464912280701755, 'lettuce ), stem vegetables'),
(14.430348258706468, 'national food processing companies'),
(14.304347826086957, 'may help prevent cancer'),
(14.3, 'allergens frequently produce symptoms'),
(14.271578947368422, 'athletes ), functional foods'),
(14.166666666666666, 'powdered milk manufacturing industry'),
(14.112040133779264, 'may also involve mixing'),
(14.0, '2015 digital life design'),
(13.733333333333334, 'many cultures use grills'),
(13.721428571428572, 'cuisine many scholars claim'),
(13.666666666666666, 'highly acidic citric juice'),
(13.631578947368421, 'et cetera ), legumes'),
(13.597014925373134, 'food preparation takes place'),
(13.583333333333334, 'highest increase since 1990'),
(13.571428571428571, 'classic italian cuisine includes'),
(13.566666666666666, 'edible seeds include cereals'),
(13.5, 'mit media lab ’'),
(13.5, 'current trend towards environmentalism'),
(13.36239954075775, 'food include animal feed'),
(13.290000000000001, 'ethnic foods include italian'),
(13.23157894736842, 'market ), plants prior'),
(13.228593872741556, 'fast food ), 6'),
(13.218181818181819, 'contemporary human dietary habits'),
(13.205555555555556, 'animals include milk produced'),
(13.197014925373134, 'international food information council'),
(13.13076923076923, 'thinly sliced raw beef'),
(13.097014925373134, 'surplus food took place'),
(12.860696517412936, 'food aid food aid'),
(12.833333333333332, 'least 3 million children'),
(12.804347826086957, 'however fish butchery may'),
(12.804347826086957, 'butcher may commonly break'),
(12.666666666666666, 'commodities futures modernization act'),
(12.65, 'dry methods include sautéing'),
(12.623333333333333, 'sour foods include citrus'),
(12.597014925373134, 'food prices rose 4'),
(12.577967306325515, 'modern international food industry'),
(12.483333333333334, 'including many healthful fats'),
(12.382729211087419, 'putting global food reserves'),
(12.333333333333334, 'vitamin c deficiency results'),
(12.25, '8 ° c ).'),
(12.20136275146009, 'food production may change'),
(12.166666666666668, 'salt promotes water excretion'),
(12.083333333333334, 'killing bacteria using heat'),
(12.023626373626373, 'sugar include raw sugar'),
(11.930348258706468, 'early food processing techniques'),
(11.884848484848485, 'religious diets dietary habits'),
(11.865384615384615, 'cooking requires applying heat'),
(11.791666666666666, 'different types kept separate'),
(11.764912280701754, 'low blood pressure ),'),
(11.737014925373135, 'food manufacturing packaged foods'),
(11.666666666666666, 'including higher oil prices'),
(11.597014925373134, 'food industry influences nutrition'),
(11.407142857142858, 'health issues human diet'),
(11.247014925373135, 'many food especially fruit'),
(10.68272921108742, 'health food movement known'),
(10.554347826086957, 'preparation may help'),
(10.166666666666666, 'harmful foreign agent'),
(10.097014925373134, 'sustainable food sydney'),
(10.097014925373134, 'common food allergens'),
(10.047619047619047, 'world resources institute'),
(10.0, 'extremely sensitive individuals'),
(9.804347826086957, 'may involve washing'),
(9.75, 'see prices spiking'),
(9.64102564102564, 'also provide services'),
(9.631578947368421, 'icescr ), recognizing'),
(9.631578947368421, 'et cetera ),'),
(9.631578947368421, 'carrots ), bulbs'),
(9.631578947368421, '3 eggs ),'),
(9.597014925373134, 'food processing industry'),
(9.583333333333334, 'provide nutritional support'),
(9.564102564102564, 'raw sliced fish'),
(9.557692307692307, 'also define cultures'),
(9.5, 'mit media lab'),
(9.5, 'increasing trend towards'),
(9.5, 'employ domestic servants'),
(9.5, '500 ° f'),
(9.5, '118 ° f'),
(9.483333333333334, 'include root vegetables'),
(9.457692307692307, 'many also undergo'),
(9.430348258706468, 'food aid provisions'),
(9.430348258706468, 'food aid dependence'),
(9.416666666666666, '000 years ago'),
(9.4, 'many cultures hold'),
(9.333333333333334, 'safety foodborne illness'),
(9.333333333333334, 'lowest since 1983'),
(9.333333333333334, 'go well together'),
(9.297014925373134, 'rising food prices'),
(9.297014925373134, 'food commodities rising'),
(9.25, 'various cultures throughout'),
(9.23076923076923, 'raw foodism promotes'),
(9.222014925373134, 'people secured food'),
(9.216666666666667, 'grain production worldwide'),
(9.15, 'eliminate many risks'),
(9.15, 'differences include preferences'),
(9.097014925373134, 'single food product'),
(9.097014925373134, 'locally grown food'),
(9.097014925373134, 'genetically modified food'),
(9.097014925373134, 'food poisoning ",'),
(9.097014925373134, 'food marketing system'),
(9.097014925373134, 'food manufacturing arose'),
(9.097014925373134, 'appealing food presentations'),
(9.097014925373134, 'almost every food'),
(9.0, 'un special rapporteur'),
(9.0, 'pork jumped 58'),
(9.0, 'olivier de schutter'),
(9.0, 'mostly hollow devices'),
(9.0, 'larger percentages going'),
(9.0, 'kevin slavin hosts'),
(9.0, 'justus von liebig'),
(9.0, 'induce similar reactions'),
(9.0, 'includes military bases'),
(9.0, 'immune system mistakes'),
(9.0, 'fleur de sel'),
(9.0, 'eventually bring prices'),
(9.0, 'entrepreneur emilie baltz'),
(9.0, 'delicate electrolyte balance'),
(9.0, 'cylindrical clay oven'),
(9.0, 'codex alimentarius commission'),
(9.0, 'bacterial foodborne illness'),
(9.0, '126bn (£ 41bn'),
(9.0, '10th millennium bc'),
(8.930348258706468, 'food contains water'),
(8.9, 'expended energy results'),
(8.9, 'development include changes'),
(8.897014925373135, 'discouraging food production'),
(8.880952380952381, '000 plant species'),
(8.857142857142858, 'mostly vegan diet'),
(8.857142857142858, 'ethnically diverse diet'),
(8.833333333333334, 'seen rapid growth'),
(8.833333333333334, 'dangerous bacterial growth'),
(8.833333333333334, 'bottled water manufacturing'),
(8.81130063965885, 'world food programme'),
(8.8, 'bees produce honey'),
(8.8, 'apples contain cyanide'),
(8.75, 'sustainable urban farming'),
(8.75, 'past five years'),
(8.714285714285715, 'stevia plant contains'),
(8.714285714285715, 'modern sanitation standards'),
(8.666666666666666, 'physical thing –'),
(8.666666666666666, 'humane slaughter act'),
(8.666666666666666, 'especially monosodium glutamate'),
(8.6636815920398, 'food energy worldwide'),
(8.64, 'refrigerating foods promptly'),
(8.633333333333333, 'improper temperature control'),
(8.631578947368421, 'mollusks ), peanuts'),
(8.625, 'benefit people suffering'),
(8.615384615384615, 'heat cooking method'),
(8.597014925373134, 'lower food reserves'),
(8.583333333333334, 'local growing season'),
(8.571428571428571, 'sugar confectionery manufacturing'),
(8.555555555555555, 'animals lay eggs'),
(8.533333333333333, 'overall rising demand'),
(8.5, 'purely speculative logic'),
(8.5, 'particularly susceptible individual'),
(8.5, 'lesser degree oranges'),
(8.5, 'jawaharlal nehru university'),
(8.5, 'direct product taken'),
(8.5, 'de schutter told'),
(8.5, 'clean preparation area'),
(8.5, 'childhood disorders caused'),
(8.5, 'alkali metal ions'),
(8.497014925373135, 'attractive food source'),
(8.466666666666667, 'contain low concentrations'),
(8.458333333333334, 'use different types'),
(8.430348258706468, 'improve food safety'),
(8.430348258706468, 'food products produced'),
(8.428571428571429, 'hold large surpluses'),
(8.416666666666666, 'limited service restaurants'),
(8.393939393939394, 'however often difficult'),
(8.382729211087419, 'known food brands'),
(8.35, 'significant international problem'),
(8.35, 'plants many plants'),
(8.347014925373134, 'countries list food'),
(8.333333333333334, 'wiktionary media related'),
(8.333333333333334, 'single high temperature'),
(8.333333333333334, 'several distinct cultivars'),
(8.333333333333334, 'products respond increasingly'),
(8.333333333333334, 'local village marketplace'),
(8.333333333333334, 'including sea salt'),
(8.333333333333334, 'human epidemiological analysis'),
(8.333333333333334, 'african coastal cities'),
(8.304347826086957, 'fruits may excrete'),
(8.3, 'critical control points'),
(8.282051282051281, 'dry cooking method'),
(8.275, 'many different types'),
(8.264285714285714, 'world health organization'),
(8.2636815920398, 'worldwide food exports'),
(8.25, 'soaked wood chips'),
(8.25, 'otherwise smooth dish'),
(8.25, 'last five years'),
(8.2, 'butcher preparing meat'),
(8.180769230769231, 'raw meat dish'),
(8.166666666666666, 'varieties – account'),
(8.166666666666666, 'term monetary gain'),
(8.1636815920398, 'food energy required'),
(8.133333333333333, 'produce negative impacts'),
(8.133333333333333, 'others use blood'),
(8.125, 'five different types'),
(8.1, 'based control systems'),
(8.097014925373134, 'unlike food processors'),
(8.097014925373134, 'top food importer'),
(8.097014925373134, 'change food manufacture'),
(8.097014925373134, 'certain food protein'),
(8.066666666666666, 'many countries regulate'),
(8.05, 'various health problems'),
(8.05, 'cultures consume blood'),
(8.047619047619047, 'world war ii'),
(8.045732874091083, 'animal food products'),
(8.0, 'wealthier asian consumers'),
(8.0, 'unsweetened dark chocolate'),
(8.0, 'smaller manageable cuts'),
(8.0, 'rises present opportunities'),
(8.0, 'require certain types'),
(8.0, 'reduce subsidies paid'),
(8.0, 'provoke lethal reactions'),
(8.0, 'promotes thyroid function'),
(8.0, 'professor caleb harper'),
(8.0, 'presentation aesthetically pleasing'),
(8.0, 'mainly involved salting'),
(8.0, 'heat vessels placed'),
(8.0, 'especially well illustrated'),
(8.0, 'especially saturated fats'),
(8.0, 'depressing crop prices'),
(8.0, 'contains essential nutrients'),
(8.0, 'china alone accounts'),
(8.0, 'agriculture organization projects'),
(7.964285714285714, 'world wheat prices'),
(7.9443478260869576, 'prepared foods may'),
(7.933333333333334, 'new financial investors'),
(7.930348258706468, 'available food preservation'),
(7.930348258706468, 'agroecosystems provide food'),
(7.928571428571429, 'potential large benefits'),
(7.9, 'usually done outside'),
(7.881578947368421, 'maize ), wheat'),
(7.85, 'social unrest around'),
(7.847014925373134, 'food either directly'),
(7.833333333333334, 'united states spent'),
(7.833333333333334, 'reduced staffing costs'),
(7.815384615384615, 'larger animal meat'),
(7.811300639658849, 'modern food storage'),
(7.8, 'produce rich oils'),
(7.7970149253731345, 'united nations food'),
(7.792857142857143, 'western popular culture'),
(7.763681592039802, 'food preservation methods'),
(7.7636815920398, 'food substance required'),
(7.733333333333334, 'disrupting local markets'),
(7.733333333333333, 'western african farmers'),
(7.728571428571429, 'large companies control'),
(7.714285714285714, 'world bank reported'),
(7.7, 'caused price fluctuations'),
(7.666666666666667, 'texture texture plays'),
(7.666666666666666, 'foreign bodies ")'),
(7.642857142857142, 'indian culture uses'),
(7.63076923076923, 'ground raw beef'),
(7.615384615384615, 'cooking certain proteins'),
(7.6, 'southeast asian markets'),
(7.597014925373134, 'food supply chains'),
(7.583333333333334, 'growing consumer demand'),
(7.571428571428571, 'japanese cuisine consists'),
(7.533333333333333, 'price since 2000'),
(7.5, 'human right derived'),
(7.5, 'gone rancid due'),
(7.5, 'ever increasing population'),
(7.5, 'britain remained dependent'),
(7.5, 'almost always caused'),
(7.430348258706468, 'offer quality food'),
(7.333333333333334, 'new mass markets'),
(7.333333333333334, 'enhance product quality'),
(7.333333333333334, 'eat pure salt'),
(7.333333333333334, 'developing heart disease'),
(7.333333333333334, 'although latex products'),
(7.2976190476190474, 'modern industrial agriculture'),
(7.25, 'threat means social'),
(7.166666666666666, 'self service approach'),
(7.166666666666666, 'long term engagement'),
(7.1269841269841265, 'taste perception animals'),
(7.1, 'way around ."'),
(7.097014925373134, 'said food items'),
(7.0476190476190474, 'world war two'),
(7.0, 'chicken noodle soup'),
(6.833333333333334, 'open pit barbecue'),
(6.75, 'major producers'),
(6.747014925373135, 'food include'),
(6.597014925373134, 'food retailing'),
(6.597014925373134, 'food preparation'),
(6.597014925373134, 'brings food'),
(6.583333333333334, 'organic farming methods'),
(6.554347826086957, 'cancer may'),
(6.533333333333333, 'developed nations today'),
(6.5, 'sweet generally regarded'),
(6.5, 'one example along'),
(6.5, 'molecule combining glucose'),
(6.5, 'market day'),
(6.5, 'government employer'),
(6.497014925373135, 'food habits'),
(6.430348258706468, 'food sources'),
(6.430348258706468, 'food safety'),
(6.430348258706468, 'food aid'),
(6.416666666666666, 'service restaurants'),
(6.4, 'many cultures'),
(6.3076923076923075, 'also includes'),
(6.2636815920398, 'food industry'),
(6.25, 'seeds within'),
(6.25, 'major influences'),
(6.218181818181819, 'dietary habits'),
(6.166666666666666, 'produced worldwide'),
(6.166666666666666, 'highly unpleasant'),
(6.161490683229814, 'ovens may'),
(6.115384615384615, 'animal preparation'),
(6.114285714285714, 'western world'),
(6.097014925373134, 'food prices'),
(6.097014925373134, 'food imports'),
(6.097014925373134, 'food deprivation'),
(6.083333333333334, 'see list'),
(6.066666666666666, 'many countries'),
(6.064102564102564, 'raw vegetables'),
(6.0, 'researchers said'),
(6.0, 'keeper could get'),
(6.0, 'et cetera'),
(6.0, 'commodities market'),
(6.0, 'archaeological evidence'),
(6.0, 'another example'),
(6.0, 'agricultural subsidy'),
(5.983333333333333, 'include methods'),
(5.948717948717949, 'animal sources'),
(5.897014925373134, 'health food'),
(5.890000000000001, 'major foods'),
(5.833333333333334, 'least since'),
(5.833333333333333, 'commercial sale'),
(5.8, 'human health'),
(5.79, 'many foods'),
(5.782051282051282, 'cooking worldwide'),
(5.75, 'many plants'),
(5.75, 'help stimulate'),
(5.666666666666667, 'disease since'),
(5.65, 'success include'),
(5.65, 'many suffering'),
(5.65, 'include pests'),
(5.63768115942029, 'salt may'),
(5.631578947368421, 'haccp ),'),
(5.631578947368421, 'asparagus ),'),
(5.6, 'international efforts'),
(5.6, 'international covenant'),
(5.6, 'international association'),
(5.597014925373134, 'food preferences'),
(5.597014925373134, 'food poisoning'),
(5.597014925373134, 'food derived'),
(5.583333333333334, 'use either'),
(5.554347826086957, 'may increase'),
(5.55, 'based shops'),
(5.542857142857143, 'western culture'),
(5.541666666666666, 'different countries'),
(5.5, 'various types'),
(5.5, 'trained waitstaff'),
(5.5, 'partly fueled'),
(5.5, 'nitrosamines generated'),
(5.5, 'mexican style'),
(5.5, 'managed agroecosystems'),
(5.5, 'common saying'),
(5.5, 'certain types'),
(5.4399999999999995, 'based foods'),
(5.430769230769231, 'raw meat'),
(5.430348258706468, 'provide food'),
(5.430348258706468, 'local food'),
(5.4, 'usually covered'),
(5.4, 'heat source'),
(5.4, 'agencies like'),
(5.371428571428572, 'blood sugar'),
(5.333333333333334, 'vegetable processing'),
(5.333333333333334, 'sophisticated processing'),
(5.333333333333334, 'room temperature'),
(5.333333333333334, 'mass slaughter'),
(5.333333333333334, 'high temperature'),
(5.3076923076923075, 'also occur'),
(5.3076923076923075, 'also exists'),
(5.304347826086957, 'others may'),
(5.3, 'domestic production'),
(5.298245614035088, 'law ),'),
(5.282051282051282, 'pressure cooking'),
(5.25, 'wood chips'),
(5.25, 'wheat futures'),
(5.25, 'developed countries'),
(5.25, 'countries list'),
(5.25, 'cereal grain'),
(5.25, '2015 ).'),
(5.230769230769231, 'raw state'),
(5.230769230769231, 'raw fruits'),
(5.214285714285714, 'plant species'),
(5.166666666666666, 'foreign bodies'),
(5.151515151515152, 'dietary analysis'),
(5.15, 'many cuisines'),
(5.142857142857142, 'ethnic culture'),
(5.133333333333333, 'mass production'),
(5.133333333333333, 'local consumers'),
(5.125, 'ordinary people'),
(5.125, 'different degree'),
(5.111111111111111, 'animals animals'),
(5.097014925373134, 'staple food'),
(5.097014925373134, 'saving food'),
(5.097014925373134, 'prepare food'),
(5.097014925373134, 'forgo food'),
(5.097014925373134, 'food taboos'),
(5.097014925373134, 'food shortages'),
(5.097014925373134, 'food security'),
(5.097014925373134, 'food scarcity'),
(5.097014925373134, 'food protection'),
(5.097014925373134, 'food preservatives'),
(5.097014925373134, 'food politics'),
(5.097014925373134, 'food naturally'),
(5.097014925373134, 'food longer'),
(5.097014925373134, 'food katz'),
(5.097014925373134, 'food goes'),
(5.097014925373134, 'food exports'),
(5.097014925373134, 'food dehydration'),
(5.097014925373134, 'food could'),
(5.097014925373134, 'food artist'),
(5.097014925373134, 'food allergy'),
(5.097014925373134, 'expensive food'),
(5.097014925373134, 'enjoy food'),
(5.097014925373134, 'eat food'),
(5.097014925373134, 'distribute food'),
(5.083333333333334, 'local shops'),
(5.083333333333334, 'demand issues'),
(5.071428571428571, 'sugar molecule'),
(5.071428571428571, 'indian cuisine'),
(5.068181818181818, 'dietary problems'),
(5.015384615384615, 'animal source'),
(5.0, 'various ways'),
(5.0, 'various documentaries'),
(5.0, 'tier market'),
(5.0, 'tandoor oven'),
(5.0, 'smaller cuts'),
(5.0, 'slightly acidic'),
(5.0, 'replace imports'),
(5.0, 'packaged broths'),
(5.0, 'olive oil'),
(5.0, 'microwave oven'),
(5.0, 'meat production'),
(5.0, 'market demands'),
(5.0, 'maintain life'),
(5.0, 'increased prices'),
(5.0, 'including rioting'),
(5.0, 'including juicing'),
(5.0, 'human beings'),
(5.0, 'higher costs'),
(5.0, 'grill held'),
(5.0, 'farmers took'),
(5.0, 'exempts slaughter'),
(5.0, 'excessive reserves'),
(5.0, 'evolutionary diets'),
(5.0, 'early version'),
(5.0, 'climate change'),
(5.0, 'children alike'),
(5.0, 'certain way'),
(5.0, 'certain sellers'),
(5.0, 'butcher paper'),
(5.0, 'american south'),
(5.0, 'agriculture organization'),
(5.0, '3 billion'),
(4.980769230769231, 'eaten raw'),
(4.971014492753623, 'may affect'),
(4.948717948717949, 'cooking methods'),
(4.948717948717949, 'animal products'),
(4.948717948717949, 'animal fats'),
(4.916666666666666, 'neediest countries'),
(4.916666666666666, 'donor countries'),
(4.865384615384615, 'cooking requires'),
(4.857142857142858, 'toaster ovens'),
(4.857142857142858, 'ovens mentioned'),
(4.857142857142858, 'healthier diet'),
(4.857142857142858, 'conventional ovens'),
(4.847014925373134, 'food contaminated'),
(4.847014925373134, 'food consumed'),
(4.833333333333334, 'water supply'),
(4.833333333333334, 'united states'),
(4.833333333333334, 'stimulate growth'),
(4.833333333333334, 'smoked fish'),
(4.833333333333334, 'reduce costs'),
(4.833333333333334, 'population growth'),
(4.833333333333334, 'open pit'),
(4.833333333333334, 'local self'),
(4.833333333333334, 'initial growth'),
(4.833333333333334, 'inflorescence vegetables'),
(4.833333333333334, 'fish monger'),
(4.833333333333334, 'demand ."'),
(4.818181818181818, 'dietary protein'),
(4.818181818181818, 'dietary choices'),
(4.8076923076923075, 'also come'),
(4.804347826086957, 'seafood may'),
(4.8, 'public health'),
(4.8, 'optimal health'),
(4.8, 'erosion control'),
(4.8, 'consumers still'),
(4.8, 'blood sausage'),
(4.8, 'based glaze'),
(4.785714285714286, 'least known'),
(4.771428571428572, 'asian cuisine'),
(4.75, 'typically high'),
(4.75, 'typically enjoyed'),
(4.75, 'recent years'),
(4.75, 'popular types'),
(4.75, 'generally requires'),
(4.75, 'coffee shops'),
(4.75, '21st century'),
(4.75, '20th century'),
(4.75, '19th century'),
(4.733333333333333, 'provide energy'),
(4.733333333333333, 'financial markets'),
(4.727272727272727, 'often referring'),
(4.727272727272727, 'often pressed'),
(4.727272727272727, 'often coordinated'),
(4.727272727272727, 'often connected'),
(4.714285714285714, 'world called'),
(4.714285714285714, 'third world'),
(4.714285714285714, 'plant matter'),
(4.714285714285714, 'modern era'),
(4.7, 'united nations'),
(4.678571428571429, 'large seeds'),
(4.666666666666667, 'various ingredients'),
(4.666666666666667, 'including humans'),
(4.666666666666667, 'disease states'),
(4.666666666666666, 'strong influence'),
(4.666666666666666, 'religious considerations'),
(4.666666666666666, 'pressure cooker'),
(4.666666666666666, 'preserving industry'),
(4.666666666666666, 'longer required'),
(4.666666666666666, 'highly related'),
(4.666666666666666, 'higher chances'),
(4.640000000000001, 'vegetable foods'),
(4.640000000000001, 'uncooked foods'),
(4.640000000000001, 'halal foods'),
(4.640000000000001, 'fortified foods'),
(4.640000000000001, 'foods high'),
(4.640000000000001, 'fattening foods'),
(4.640000000000001, 'consume foods'),
(4.640000000000001, 'canned foods'),
(4.625, 'encourage people'),
(4.625, 'different cuisines'),
(4.625, 'different characteristics'),
(4.619047619047619, 'global fruit'),
(4.615384615384615, 'whole animal'),
(4.615384615384615, 'cooking technique'),
(4.615384615384615, 'animal welfare'),
(4.607142857142858, 'diet cultural'),
(4.6, 'specific set'),
(4.6, 'specific group'),
(4.6, 'specific components'),
(4.6, 'new kind'),
(4.6, 'new delhi'),
(4.597014925373134, 'recently food'),
(4.597014925373134, 'food rationing'),
(4.597014925373134, 'food presented'),
(4.597014925373134, 'food occurs'),
(4.597014925373134, 'food allergies'),
(4.583333333333334, 'industrial farming'),
(4.571428571428571, 'taste buds'),
(4.571428571428571, 'sweet taste'),
(4.571428571428571, 'recognizable cuisine'),
(4.571428571428571, 'pungent taste'),
(4.555555555555555, 'live animals'),
(4.5, 'various vitamins'),
(4.5, 'urban sales'),
(4.5, 'university presses'),
(4.5, 'smooth topping'),
(4.5, 'six companies'),
(4.5, 'serious problem'),
(4.5, 'reduced nectar'),
(4.5, 'purely interested'),
(4.5, 'purchasers told'),
(4.5, 'promotes obesity'),
(4.5, 'panel discussion'),
(4.5, 'muscle systems'),
(4.5, 'metal grid'),
(4.5, 'manufactured outside'),
(4.5, 'making one'),
(4.5, 'lower cost'),
(4.5, 'long chains'),
(4.5, 'least 2018'),
(4.5, 'last aspect'),
(4.5, 'largest direct'),
(4.5, 'keep prices'),
(4.5, 'kashrut require'),
(4.5, 'increasing concern'),
(4.5, 'improper storage'),
(4.5, 'high potential'),
(4.5, 'grill operates'),
(4.5, 'ghosh points'),
(4.5, 'farm subsidies'),
(4.5, 'economically dependent'),
(4.5, 'easily available'),
(4.5, 'commonly called'),
(4.5, 'certain tastes'),
(4.5, 'blind due'),
(4.477272727272727, 'often eaten'),
(4.466666666666667, 'rice production'),
(4.466666666666667, 'contain ingredients'),
(4.464285714285714, 'modern times'),
(4.428571428571429, 'large volumes'),
(4.428571428571429, 'large proportion'),
(4.428571428571429, 'large corporations'),
(4.416666666666666, 'kept cold'),
(4.4, 'hinduism beef'),
(4.4, 'good source'),
(4.4, 'beef steak'),
(4.4, 'asian nations'),
(4.393939393939394, 'often affect'),
(4.392857142857142, 'popular culture'),
(4.390000000000001, 'cold foods'),
(4.333333333333334, 'unsaturated fats'),
(4.333333333333334, 'two methods'),
(4.333333333333334, 'sour sourness'),
(4.333333333333334, 'soaring demand'),
(4.333333333333334, 'small local'),
(4.333333333333334, 'several organisations'),
(4.333333333333334, 'several categories'),
(4.333333333333334, 'open fire'),
(4.333333333333334, 'nutrition attempts'),
(4.333333333333334, 'negatively impacts'),
(4.333333333333334, 'mined salt'),
(4.333333333333334, 'medicinal products'),
(4.333333333333334, 'local level'),
(4.333333333333334, 'iodine deficiency'),
(4.333333333333334, 'industrial revolution'),
(4.333333333333334, 'hunger ".'),
(4.333333333333334, 'hazard analysis'),
(4.333333333333334, 'grey salt'),
(4.333333333333334, 'developed massively'),
(4.333333333333334, 'consumer demand'),
(4.333333333333334, 'ambient bacteria'),
(4.333333333333333, 'thus considered'),
(4.333333333333333, 'rice –'),
(4.333333333333333, 'religious law'),
(4.333333333333333, 'kept hot'),
(4.333333333333333, 'investment institutions'),
(4.318181818181818, 'dietary fiber'),
(4.306666666666667, 'kosher foods'),
(4.306666666666667, 'hot foods'),
(4.285714285714286, 'strategy known'),
(4.285714285714286, 'known patients'),
(4.285714285714286, 'global supply'),
(4.285714285714286, 'global fluid'),
(4.285714285714286, 'global economy'),
(4.285714285714286, 'global cocoa'),
(4.285714285714286, 'global basis'),
(4.285714285714286, 'compound known'),
(4.282051282051282, 'cooking equipment'),
(4.266666666666667, 'specific time'),
(4.25, 'typically rare'),
(4.25, 'social constructs'),
(4.25, 'population either'),
(4.25, 'popular sweetener'),
(4.25, 'nutritional properties'),
(4.25, 'msg ).'),
(4.25, 'evolutionarily significant'),
(4.25, 'dramatic increase'),
(4.25, 'create problems'),
(4.25, 'commonly eaten'),
(4.25, 'cauliflower ).'),
(4.25, 'behavioral problems'),
(4.25, '2003 ).'),
(4.214285714285714, 'world study'),
(4.214285714285714, 'plant parts'),
(4.2, 'price shocks'),
(4.2, 'price controls'),
(4.2, 'meat preservative'),
(4.2, 'consume meat'),
(4.2, 'charred meat'),
(4.166666666666667, 'less commonly'),
(4.166666666666666, 'long term'),
(4.166666666666666, 'especially mushrooms'),
(4.142857142857142, 'culture makes'),
(4.140000000000001, 'enjoyable foods'),
(4.140000000000001, 'eating foods'),
(4.115384615384615, 'animal origin'),
(4.1, 'around 2'),
(4.071428571428571, 'simple sugar'),
(4.071428571428571, 'pleasant taste'),
(4.055555555555555, 'even animals'),
(4.0, '£ 79bn'),
(4.0, 'wto refer'),
(4.0, 'wto enforcement'),
(4.0, 'word foodstuff'),
(4.0, 'wider range'),
(4.0, 'wide range'),
(4.0, 'whole sheep'),
(4.0, 'vulnerable groups'),
(4.0, 'vermin controls'),
(4.0, 'vending machines'),
(4.0, 'vegetarians choose'),
(4.0, 'vegetable processors'),
(4.0, 'vast warehouse'),
(4.0, 'vast range'),
(4.0, 'varying degrees'),
(4.0, 'uruguay round'),
(4.0, 'tier structure'),
(4.0, 'threatened withdrawal'),
(4.0, 'systematic approaches'),
(4.0, 'substance intended'),
(4.0, 'strongly related'),
(4.0, 'strict interpretations'),
(4.0, 'steak tartare'),
(4.0, 'spoil easily'),
(4.0, 'song dynasty'),
(4.0, 'something crunchy'),
(4.0, 'soil fertility'),
(4.0, 'small number'),
(4.0, 'simplest level'),
(4.0, 'scribner nestle'),
(4.0, 'sauté pan'),
(4.0, 'sauce pot'),
(4.0, 'ripened ovaries'),
(4.0, 'richard peto'),
(4.0, 'richard doll'),
(4.0, 'rhetorical function'),
(4.0, 'research proposed'),
(4.0, 'recreational places'),
(4.0, 'recommended measures'),
(4.0, 'really interested'),
(4.0, 'real terms'),
(4.0, 'real match'),
(4.0, 'reaching effects'),
(4.0, 'quite small'),
(4.0, 'psychotropic substances'),
(4.0, 'protection policies'),
(4.0, 'print labels'),
(4.0, 'previously discussed'),
(4.0, 'polyaromatic hydrocarbons'),
(4.0, 'political tool'),
(4.0, 'plate ".'),
(4.0, 'pension funds'),
(4.0, 'others choose'),
(4.0, 'ordered procedure'),
(4.0, 'opposite flavors'),
(4.0, 'obtained directly'),
(4.0, 'nutrients necessary'),
(4.0, 'newsouth publishing'),
(4.0, 'necessary nutrient'),
(4.0, 'morgan stanley'),
(4.0, 'moderate proportions'),
(4.0, 'microbiological studies'),
(4.0, 'merely aggravated'),
(4.0, 'medical emergency'),
(4.0, 'meaning iodine'),
(4.0, 'march 24'),
(4.0, 'manufacture cans'),
(4.0, 'mammary glands'),
(4.0, 'maintained rather'),
(4.0, 'louis pasteur'),
(4.0, 'lost ".'),
(4.0, 'longer run'),
(4.0, 'liquid propane'),
(4.0, 'legal definition'),
(4.0, 'kosher salt'),
(4.0, 'jugged hare'),
(4.0, 'jayati ghosh'),
(4.0, 'japanese word'),
(4.0, 'isbn 9781920705541'),
(4.0, 'intermediate corporations'),
(4.0, 'inorganic substances'),
(4.0, 'initial treatment'),
(4.0, 'individual cook'),
(4.0, 'increasing consumption'),
(4.0, 'increased irrespective'),
(4.0, 'improve peoples'),
(4.0, 'import quotas'),
(4.0, 'humanitarian implications'),
(4.0, 'herbal supplements'),
(4.0, 'hedge funds'),
(4.0, 'habitual decisions'),
(4.0, 'groundwater recharge'),
(4.0, 'grilled ")'),
(4.0, 'green algae'),
(4.0, 'greater understanding'),
(4.0, 'goldman sachs'),
(4.0, 'globe artichokes'),
(4.0, 'fundamental right'),
(4.0, 'fundamental drivers'),
(4.0, 'fully aware'),
(4.0, 'fuel types'),
(4.0, 'fruits .)'),
(4.0, 'flood mitigation'),
(4.0, 'flavors unique'),
(4.0, 'fishing vessel'),
(4.0, 'financial institutions'),
(4.0, 'fatal version'),
(4.0, 'farm gate'),
(4.0, 'eyes ".'),
(4.0, 'expanded edition'),
(4.0, 'evolutionary purposes'),
(4.0, 'evolutionarily speaking'),
(4.0, 'european union'),
(4.0, 'etc .).'),
(4.0, 'etc .)'),
(4.0, 'entire communities'),
(4.0, 'enjoy sweet'),
(4.0, 'egg whites'),
(4.0, 'economically survive'),
(4.0, 'easier revenues'),
(4.0, 'dramatic changes'),
(4.0, 'distant locations'),
(4.0, 'disputed grounds'),
(4.0, 'dictionary definition'),
(4.0, 'dhabīḥah halal'),
(4.0, 'december 2010'),
(4.0, 'culinary art'),
(4.0, 'consumer spending'),
(4.0, 'cityfarm project'),
(4.0, 'cigarette butts'),
(4.0, 'choking hazard'),
(4.0, 'chewing gum'),
(4.0, 'chemically changes'),
(4.0, 'chemically alter'),
(4.0, 'charcoal along'),
(4.0, 'ceramic pot'),
(4.0, 'cases account'),
(4.0, 'carotid artery'),
(4.0, 'called barbacoa'),
(4.0, 'bubbles exploding'),
(4.0, 'botanical fruits'),
(4.0, 'borne illnesses'),
(4.0, 'bilaterally resolved'),
(4.0, 'benjamin palmer'),
(4.0, 'begun calling'),
(4.0, 'barbarian group'),
(4.0, 'bamboo shoots'),
(4.0, 'avoiding sugars'),
(4.0, 'avoid contaminating'),
(4.0, 'artificial sweeteners'),
(4.0, 'arab spring'),
(4.0, 'appetizing way'),
(4.0, 'annual conference'),
(4.0, 'andras forgacs'),
(4.0, 'anaphylactic shock'),
(4.0, 'ambient conditions'),
(4.0, 'amber color'),
(4.0, 'alcoholic drinks'),
(4.0, 'alcoholic beverages'),
(4.0, 'agricultural'),
(4.0, 'actual supply'),
(4.0, '496 billion'),
(4.0, '450 economists'),
(3.916666666666667, 'significant role'),
(3.916666666666667, 'significant part'),
(3.916666666666667, 'either starvation'),
(3.916666666666667, '2012 ).'),
(3.916666666666667, '2007 ).'),
(3.8333333333333335, 'tobacco products'),
(3.8333333333333335, 'possible impacts'),
(3.8333333333333335, 'packaging methods'),
(3.8333333333333335, 'omega fats'),
(3.8333333333333335, 'moist methods'),
(3.8333333333333335, 'historically salt'),
(3.8333333333333335, 'dairy products'),
(3.833333333333333, 'short term'),
(3.75, 'substance consumed'),
(3.75, 'nutritional economics'),
(3.75, 'nitrites used'),
(3.75, 'good flavor'),
(3.75, 'enhancing flavor'),
(3.75, 'enhance flavor'),
(3.75, 'cultural rights'),
(3.75, 'cultural identity'),
(3.75, 'become contaminated'),
(3.75, '300 times'),
(3.75, '2011 ).'),
(3.7, 'cured meat'),
(3.666666666666667, 'ultimately starvation'),
(3.666666666666667, 'town supermarkets'),
(3.666666666666667, 'thus changing'),
(3.666666666666667, 'specifically meats'),
(3.666666666666667, 'specifically humans'),
(3.666666666666667, 'short periods'),
(3.666666666666667, 'prepared time'),
(3.666666666666667, 'poor treatment'),
(3.666666666666667, 'poor intake'),
(3.666666666666667, 'partially processed'),
(3.666666666666667, 'pan frying'),
(3.666666666666667, 'march 2012'),
(3.666666666666667, 'less determined'),
(3.666666666666667, 'kettle corn'),
(3.666666666666667, 'items processed'),
(3.666666666666667, 'frying pan'),
(3.666666666666667, 'emerging technology'),
(3.666666666666667, 'december 2007'),
(3.666666666666667, 'crucial role'),
(3.666666666666667, '2008 estimate'),
(3.666666666666667, '2008 accounted'),
(3.6666666666666665, 'term restaurant'),
(3.6666666666666665, 'edible'),
(3.5, 'year highs'),
(3.5, 'wide array'),
(3.5, 'unsafe container'),
(3.5, 'trend'),
(3.5, 'specifically lemons'),
(3.5, 'soft drink'),
(3.5, 'similarly evident'),
(3.5, 'second type'),
...]

``````

### WORD2VEC + DOC2VEC

``````

In [ ]:

``````

### TEXT RANK?

``````

In [ ]:

``````