In [2]:
from yummly import Client
import json
import requests
import pandas as pd
import numpy as np 
import re

In [3]:
# API call for the first 500 BB recipes labeled as such only!
header= {'X-Yummly-App-ID':'79663a75', 'X-Yummly-App-Key':'02b233108f476f3110e0f65437c4d6dd'}
url='http://api.yummly.com/v1/api/recipes?'
parameters={
            'allowedCourse[]':'course^course-Soups',
            'excludedCourse[]': ['course^course-Main Dishes','course^course-Appetizers', 'course^course-Salads', 'course^course-Lunch',
                                'course^course-Side Dishes','course^course-Desserts','course^course-Breads',
                                 'course^course-Breakfast and Brunch', 'course^course-Beverages', 'course^course-Condiments and Sauces',
                                'course^course-Cocktails', 'course^course-Snacks'],
            'maxResult': 500,
            'start': 0
            }

response=requests.get(url, headers = header, params = parameters)

In [4]:
response.status_code


Out[4]:
200

In [6]:
SP=response.json()
print type(SP)
print SP.keys()


<type 'dict'>
[u'matches', u'totalMatchCount', u'attribution', u'facetCounts', u'criteria']

In [7]:
#only interrested in the information under matches. 
print len(SP['matches'])
print type(SP['matches'])
print SP['matches'][0].keys()


500
<type 'list'>
[u'flavors', u'rating', u'totalTimeInSeconds', u'ingredients', u'smallImageUrls', u'sourceDisplayName', u'recipeName', u'attributes', u'id', u'imageUrlsBySize']

In [8]:
#checkout one recipe
SP_matches=SP['matches']
SP_matches[499]


Out[8]:
{u'attributes': {u'course': [u'Soups'], u'cuisine': [u'Barbecue']},
 u'flavors': {u'bitter': 0.3333333333333333,
  u'meaty': 0.16666666666666666,
  u'piquant': 0.8333333333333334,
  u'salty': 0.3333333333333333,
  u'sour': 0.8333333333333334,
  u'sweet': 0.3333333333333333},
 u'id': u'Chilled-Avocado-Soup-with-Grilled-Chili-Spiced-Shrimp-1701737',
 u'imageUrlsBySize': {u'90': u'https://lh3.googleusercontent.com/-vd-5f0Q69BBwVjcpFbX_tBEY0ircRm_qeocSdAifgSn2yu4h8_G_xEERwBB63ZjGvH18Fj6_p7_kOvSYCxiCw=s90-c'},
 u'ingredients': [u'low-fat buttermilk',
  u'avocado',
  u'chicken stock',
  u'plain low fat greek yogurt',
  u'chopped cilantro',
  u'garlic cloves',
  u'jalapeno chilies',
  u'kosher salt',
  u'lime',
  u'mango',
  u'red bell pepper',
  u'purple onion',
  u'cilantro',
  u'large shrimp',
  u'chili powder',
  u'cayenne pepper'],
 u'rating': 4,
 u'recipeName': u'Chilled Avocado Soup with Grilled Chili Spiced Shrimp',
 u'smallImageUrls': [u'https://lh3.googleusercontent.com/T2AhaHA4CKaWyvNx2FGIY0DETCIYUNA2Uj89KXG4qlJ-isYre5P8m2Vbf7Y9WKRuX_qcaasgxhZRTI2HQL03ag=s90'],
 u'sourceDisplayName': u'Suburban Soapbox',
 u'totalTimeInSeconds': 960}

In [11]:
#forming lists to create dataframes of the features we want. 
main_list = []
ingredients_list = []
attributes_list = []


for food in SP_matches:

    _d1 = {}
    _d1['id'] = food['id']
    _d1['rating'] = food['rating']
    _d1['recipeName'] = food['recipeName']
    _d1['sourceDisplayName'] = food['sourceDisplayName']
    main_list.append(_d1)
    
    _d2 = {}
    _d2['id'] = food['id']
    _d2['course'] = 'Breakfast and Brunch' 
    _d2['ingredient_list'] =  food['ingredients']
    for i in food['ingredients']:
        i = i.lower() # additional code to convert to lowercase
        i = re.sub(r'\d+%\s', '', i) # additional code to remove 1%, 2%, etc
        i = re.sub(r'\xae', '', i) # remove '\xae' characters
        i = re.sub(r'shredded\s', '', i)
        i = re.sub(r'chopped\s', '', i)
        i = re.sub(r'diced\s', '', i)
        i = re.sub(r'crumbled\s', '', i)
        i = re.sub(r'fresh\s', '', i)
        i = re.sub(r'grated\s', '', i)
        i = re.sub(r'fat free\s', '', i)
        i = re.sub(r'boneless\s', '', i)
        i = re.sub(r'boneless skinless\s', '', i)
        i = re.sub(r'minced\s', '', i)
        i = re.sub(r'sliced\s', '', i)
        i = re.sub(r'(?!ground beef)ground ', '', i)
        i = re.sub(r'^dried\s', '', i)
        i = re.sub(r'^cooked\s', '', i)
        
        _d2[i] = 1
    ingredients_list.append(_d2)

    _d3 = {}
    _d3['id'] = food['id']
    for k, v in food['attributes'].items():
        for i in v:
            _d3[i] = 1
    attributes_list.append(_d3)

flavors_dict = {}

for food in SP_matches:
    flavors_dict[food.get('id')] = food.get('flavors')

In [13]:
# read in csv for cuisine and create list of possible values 
cuisine_df = pd.read_csv('/Users/bruktawitabebe/Desktop/Yummly/cuisine_headers.csv', names=['cuisine'])
cuisine_list= cuisine_df.cuisine

In [16]:
#create dictionary of cuisine and course for each recipe

cuisine_dict={}
for food in SP_matches:
    cuisine_dict[food.get('id')]= food['attributes'].get('cuisine')
            
_cuisines= {}       

for k, v in cuisine_dict.iteritems():
    cuisine_val = {}
    for course in cuisine_list:
        try:
            if course in v :
                cuisine_val[course] = 1
            else:
                cuisine_val[course] = 0
        except TypeError:
            cuisine_val[course] = 0
    
        _cuisines[k] = cuisine_val

In [18]:
#get list of recipe ids
recipe_ids=[]
for recipe in SP_matches:
    recipe_ids.append(recipe['id'])

In [19]:
# second api call to get other features for each recipe
key_id= '_app_id=79663a75&_app_key=02b233108f476f3110e0f65437c4d6dd'
url='http://api.yummly.com/v1/api/recipe/'

In [20]:
# retrieve other features for all recipes

def get_recipe(_id):
    response = requests.get(url + _id + '?' + key_id)
    return response.json()

recipes=[]
for _id in recipe_ids :
    recipes.append(get_recipe(_id))

In [21]:
response.status_code


Out[21]:
200

In [22]:
print len(recipes)
print recipes[1].keys()


500
[u'totalTime', u'ingredientLines', u'attribution', u'name', u'rating', u'numberOfServings', u'yield', u'nutritionEstimates', u'source', u'flavors', u'images', u'attributes', u'id', u'totalTimeInSeconds']

In [23]:
#for each recipe create a new dictionary of selected attributes and append into a list

recipe_details=[]
for recipe in recipes:
    _dict={}
    #import pdb; pdb.set_trace()
    _dict['id']=recipe['id']
    _dict['ingredientCount'] = len(recipe['ingredientLines'])
    _dict['numberOfServings'] = recipe['numberOfServings']
    _dict['prepTimeInSeconds'] = recipe.get('prepTimeInSeconds')
    _dict['cookTimeInSeconds'] = recipe.get('cookTimeInSeconds')
    _dict['totalTimeInSeconds'] = recipe.get('totalTimeInSeconds')
    
    recipe_details.append(_dict)

In [25]:
#create dataframes, arrange column index and save into csv
df_main = pd.DataFrame(main_list)
df_main.to_csv('SP_main.csv', encoding ='utf-8')

df_ingredients = pd.DataFrame(ingredients_list)
df_ingredients = df_ingredients.fillna(0)
cols = list(df_ingredients)
cols.insert(0, cols.pop(cols.index('id')))
cols.insert(1, cols.pop(cols.index('course')))
df_ingredients= df_ingredients.ix[:,cols]
df_ingredients.to_csv('SP_ingredients.csv', encoding ='utf-8')

df_attributes = pd.DataFrame(attributes_list)
df_attributes = df_attributes.fillna(0)
cols = list(df_attributes)
cols.insert(0, cols.pop(cols.index('id')))
df_attributes = df_attributes.ix[:,cols]
df_attributes.to_csv('SP_attributes.csv')

df_flavors = pd.DataFrame(flavors_dict).transpose()
df_flavors.reset_index(level=0, inplace=True)
df_flavors.to_csv('SP_flavors.csv')

df_cuisines = pd.DataFrame(_cuisines).transpose()
df_cuisines.reset_index(level=0, inplace=True)
df_cuisines.to_csv('SP_cuisines.csv')

df_details=pd.DataFrame(recipe_details)
cols = list(df_details)
cols.insert(0, cols.pop(cols.index('id')))
df_details=df_details.ix[:,cols]
df_details.to_csv('SP_details.csv')