notebook.community

Edit and run



In [1]:

    
from yummly import Client
import json
import requests
import pandas as pd
import numpy as np 
import re



In [31]:

    
# API call for the first 500 BB recipes labeled as such only!
header= {'X-Yummly-App-ID':'79663a75', 'X-Yummly-App-Key':'02b233108f476f3110e0f65437c4d6dd'}
url='http://api.yummly.com/v1/api/recipes?'
parameters={
            'allowedCourse[]':'course^course-Breakfast and Brunch',
            'excludedCourse[]': ['course^course-Main Dishes','course^course-Appetizers', 'course^course-Salads', 'course^course-Lunch',
                                'course^course-Side Dishes','course^course-Desserts','course^course-Breads',
                                 'course^course-Soups', 'course^course-Beverages', 'course^course-Condiments and Sauces',
                                'course^course-Cocktails', 'course^course-Snacks'],
            'maxResult': 501,
            'start': 500
            }

response=requests.get(url, headers = header, params = parameters)



In [32]:

    
response.status_code









    Out[32]:





200



In [52]:

    
BB=response.json()

print type(BB)
print BB.keys()









    



<type 'dict'>
[u'matches', u'totalMatchCount', u'attribution', u'facetCounts', u'criteria']



In [53]:

    
#only interrested in the information under matches. 
print len(BB['matches'])
print type(BB['matches'])
print BB['matches'][0].keys()









    



500
<type 'list'>
[u'flavors', u'rating', u'totalTimeInSeconds', u'ingredients', u'smallImageUrls', u'sourceDisplayName', u'recipeName', u'attributes', u'id', u'imageUrlsBySize']



In [54]:

    
#checkout one recipe
BB_matches=BB['matches']
BB_matches[0]









    Out[54]:





{u'attributes': {u'course': [u'Breakfast and Brunch']},
 u'flavors': {u'bitter': 0.3333333333333333,
  u'meaty': 0.6666666666666666,
  u'piquant': 0.0,
  u'salty': 0.5,
  u'sour': 0.16666666666666666,
  u'sweet': 0.6666666666666666},
 u'id': u'Coconut-Flour-Pancakes-1638026',
 u'imageUrlsBySize': {u'90': u'https://lh3.googleusercontent.com/PN8m39E7AB1zanWBNrdecgePqMRO9NNG1g8zkRKeN5uvQ1KvA3QMuStyLpKCVTTuDd4kgHVE3Dqr0H8CNDX1=s90-c'},
 u'ingredients': [u'eggs',
  u'applesauce',
  u'coconut milk',
  u'coconut sugar',
  u'cinnamon',
  u'coconut flour',
  u'salt'],
 u'rating': 4,
 u'recipeName': u'Coconut Flour Pancakes',
 u'smallImageUrls': [u'https://lh3.googleusercontent.com/qv72rHAWgr2LXDrBIkJkZXeclh7ThiDNzLvOmhhV-XqPxIxWqKRRh73laOgZZhx8uJhZ_1mu-18VjnOgJAhdWw=s90'],
 u'sourceDisplayName': u'Healthy Lifestyle Today',
 u'totalTimeInSeconds': 1560}



In [55]:

    
#import previous list of recipes collected
df=pd.read_csv('BB_main.csv')
df1=pd.read_csv('BB_main_1.csv')
BB_ids=df.id
BB1_ids=df1.id
print BB_ids[0]
print BB1_ids[0]
BB2_ids=[]
for recipe in BB_matches:
    BB2_ids.append(recipe['id'])
print BB2_ids[0]
#check if there are dupplicate recipes
print [i for i, j in zip(BB_ids, BB2_ids) if i == j]
print [i for i, j in zip(BB1_ids, BB2_ids) if i == j]









    



Healthy-Chocolate-Porridge-1711204
Baked-French-Toast-Casserole-1636754
Coconut-Flour-Pancakes-1638026
[]
['French-Toast-with-Vegan-Nog-964692', 'Quick-and-Easy-Waffles-1537027']



In [57]:

    
#remove duplicate recipe from the recipe
BB_matches[:] = [d for d in BB_matches if d.get('id') != 'French-Toast-with-Vegan-Nog-964692']
BB_matches[:] = [d for d in BB_matches if d.get('id') != 'Quick-and-Easy-Waffles-1537027'] 
                 #'Quick-and-Easy-Waffles-1537027'

# check to see if recipes have been removed
BB2_ids = []
for recipe in BB_matches:
    BB2_ids.append(recipe['id'])
    
print [i for i, j in zip(BB1_ids, BB2_ids) if i == j]
len(BB_matches)









    



[]






    Out[57]:





498



In [69]:

    
#forming lists to create dataframes of the features we want. 
main_list = []
ingredients_list = []
attributes_list = []

for food in BB_matches:

    _d1 = {}
    _d1['id'] = food['id']
    _d1['rating'] = food['rating']
    _d1['recipeName'] = food['recipeName']
    _d1['sourceDisplayName'] = food['sourceDisplayName']
    
    
    main_list.append(_d1)
    
    _d2 = {}
    _d2['id'] =food['id']
    _d2['course']= 'Breakfast and Brunch'
    for i in food['ingredients']:
        i = i.lower() # additional code to conver to lowercase
        i = re.sub(r'\d+%\s', '', i) # additional code to remove 1%, 2%, etc
        i = re.sub(r'\xae', '', i) # remove '\xae' characters
        _d2[i] = 1
    ingredients_list.append(_d2)

    _d3 = {}
    _d3['id'] = food['id']
    for k, v in food['attributes'].items():
        for i in v:
            _d3[i] = 1
    attributes_list.append(_d3)
    
flavors_dict = {}

for food in BB_matches:
    flavors_dict[food.get('id')] = food.get('flavors')



In [59]:

    
# read in dictionary for course and cuisine and create list of possible values for each
cuisine_df = pd.read_csv('cuisine_headers.csv', names=['cuisine'])

cuisine_list= cuisine_df.cuisine



In [61]:

    
#create dictionary of cuisine and course for each recipe
cuisine_dict={}
for food in BB_matches:
    cuisine_dict[food.get('id')]= food['attributes'].get('cuisine')

        
_cuisines= {}       

for k, v in cuisine_dict.iteritems():
    cuisine_val = {}
    for course in cuisine_list:
        try:
            if course in v :
                cuisine_val[course] = 1
            else:
                cuisine_val[course] = 0
        except TypeError:
            cuisine_val[course] = 0
    
        _cuisines[k] = cuisine_val



In [64]:

    
# second api call to get other features for each recipe
key_id= '_app_id=79663a75&_app_key=02b233108f476f3110e0f65437c4d6dd'
url='http://api.yummly.com/v1/api/recipe/'



In [65]:

    
# retrieve other features for all recipes

def get_recipe(_id):
    response = requests.get(url + _id + '?' + key_id)
    return response.json()

recipes=[]
for _id in BB2_ids :
    recipes.append(get_recipe(_id))



In [62]:

    
response.status_code









    Out[62]:





200



In [66]:

    
print len(recipes)
print recipes[1].keys()









    



498
[u'totalTime', u'ingredientLines', u'attribution', u'name', u'prepTimeInSeconds', u'rating', u'cookTimeInSeconds', u'numberOfServings', u'yield', u'nutritionEstimates', u'source', u'flavors', u'images', u'attributes', u'cookTime', u'id', u'prepTime', u'totalTimeInSeconds']



In [67]:

    
#for each recipe create a new dictionary of selected attributes and append into a list

recipe_details=[]
for recipe in recipes:
    _dict={}
    #import pdb; pdb.set_trace()
    _dict['id']=recipe['id']
    _dict['ingredientCount']= len(recipe['ingredientLines'])
    _dict['numberOfServings']= recipe['numberOfServings']
    if 'prepTimeInSeconds' in recipe.keys():
        _dict['prepTimeInSeconds']= recipe['prepTimeInSeconds']
    else:
        _dict['prepTimeInSeconds']= None
    if 'cookTimeInSeconds' in recipe.keys():
        _dict['cookTimeInSeconds']= recipe['cookTimeInSeconds']
    else:
        _dict['cookTimeInSeconds']= None
    _dict['totalTimeInSeconds']=recipe['totalTimeInSeconds']
    
    
    recipe_details.append(_dict)



In [71]:

    
#create dataframes, arrange column index and save into csv
df_main = pd.DataFrame(main_list)
df_main.to_csv('BB_main_2.csv', encoding ='utf-8')

df_ingredients = pd.DataFrame(ingredients_list)
df_ingredients = df_ingredients.fillna(0)
cols = list(df_ingredients)
cols.insert(0, cols.pop(cols.index('id')))
cols.insert(1, cols.pop(cols.index('course')))
df_ingredients= df_ingredients.ix[:,cols]
df_ingredients.to_csv('BB_ingredients_2.csv', encoding ='utf-8')

df_attributes = pd.DataFrame(attributes_list)
df_attributes = df_attributes.fillna(0)
cols = list(df_attributes)
cols.insert(0, cols.pop(cols.index('id')))
df_attributes = df_attributes.ix[:,cols]
df_attributes.to_csv('BB_attributes_2.csv')

df_flavors = pd.DataFrame(flavors_dict).transpose()
df_flavors.reset_index(level=0, inplace=True)
df_flavors.to_csv('BB_flavors_2.csv')

df_cuisines = pd.DataFrame(_cuisines).transpose()
df_cuisines.reset_index(level=0, inplace=True)
df_cuisines.to_csv('BB_cuisines_2.csv')

df_details=pd.DataFrame(recipe_details)
cols = list(df_details)
cols.insert(0, cols.pop(cols.index('id')))
df_details=df_details.ix[:,cols]
df_details.to_csv('BB_details_2.csv')



In [27]:

    
for i in df_ingredients.columns:
    if 'egg' in i:
        print i









    



beaten eggs
cholesterol free egg substitute
egg whites
egg yolks
eggnog
eggs
extra large eggs
flax egg
jumbo eggs
large egg whites
large egg yolks
large eggs
veggies



In [29]:

    
df_ingredients.sum(axis=0).sort_values(ascending=False)









    Out[29]:





id                             Blueberry-Breakfast-Bake-1699614Broccoli_-ham_...
eggs                                                                         254
salt                                                                         223
milk                                                                         146
baking powder                                                                110
cinnamon                                                                     100
butter                                                                        83
vanilla extract                                                               79
sugar                                                                         75
large eggs                                                                    74
pepper                                                                        65
water                                                                         53
flour                                                                         52
maple syrup                                                                   51
bananas                                                                       49
vanilla                                                                       48
honey                                                                         42
all-purpose flour                                                             38
baking soda                                                                   35
olive oil                                                                     34
ground cinnamon                                                               33
rolled oats                                                                   31
chia seeds                                                                    29
coconut oil                                                                   27
brown sugar                                                                   27
sea salt                                                                      27
unsalted butter                                                               25
onions                                                                        25
strawberries                                                                  25
buttermilk                                                                    24
                                                     ...                        
low-fat granola                                                                1
iodized salt                                                                   1
instant yeast                                                                  1
hot sausage                                                                    1
green bell pepper                                                              1
sweet pepper                                                                   1
granny smith apples                                                            1
vegan English muffins                                                          1
grape jelly                                                                    1
grass-fed butter                                                               1
grated Monterey Jack cheese                                                    1
grated nutmeg                                                                  1
grated orange                                                                  1
gravy mix                                                                      1
vanilla yogurt                                                                 1
green beans                                                                    1
green chile                                                                    1
hot dog bun                                                                    1
greens                                                                         1
grits                                                                          1
ground allspice                                                                1
ground almonds                                                                 1
vanilla powder                                                                 1
vanilla flavoring                                                              1
ground hazelnuts                                                               1
ground white pepper                                                            1
gruyere cheese                                                                 1
heavy whipping cream                                                           1
hollandaise sauce                                                              1
goat cheese                                                                    1
dtype: object



In [ ]: