In [2]:

    
import pandas as pd
import numpy as np

Appetizers

Main



In [3]:

    
df = pd.read_csv('APP_main.csv')
df1 = pd.read_csv('APP_main_1.csv')
df2 = pd.read_csv('APP_main_2.csv')
df3 = pd.read_csv('APP_main_3.csv')



In [4]:

    
print df1.shape
df3.head()









    



(500, 5)






    Out[4]:






  
    
      
      Unnamed: 0
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      0
      Pizza-dip-333915
      5
      Pizza Dip
      How Sweet It Is
    
    
      1
      1
      The-Greatest-Hummus-Recipe-Ever_-1663684
      4
      The Greatest Hummus Recipe Ever!
      Gluten Free Yummy
    
    
      2
      2
      Garlic-Herb-Cream-Cheese-Dip-972926
      4
      Garlic Herb Cream Cheese Dip
      Cooking On A Budget
    
    
      3
      3
      Frank_s-RedHot-Buffalo-Chicken-Dip-1089684
      3
      Frank's RedHot Buffalo Chicken Dip
      Frank's Red Hot
    
    
      4
      4
      Marinated-Japanese-Mushrooms-1102960
      4
      Marinated Japanese Mushrooms
      No Recipes



In [5]:

    
df = df.drop('Unnamed: 0', 1)



In [6]:

    
print df1.shape
df1 = df1.drop('Unnamed: 0', 1)
df1.head(3)









    



(500, 5)






    Out[6]:






  
    
      
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      Skinny-Hummus-With-Zero-Tahini-1627413
      4
      Skinny Hummus With Zero Tahini
      The Kitchen Girl
    
    
      1
      Pico-De-Gallo-1682898
      4
      Pico De Gallo
      The Girl Who Ate Everything
    
    
      2
      Fiesta-Baked-Cheese-Dip-1696540
      3
      Fiesta Baked Cheese Dip
      Kraft



In [7]:

    
print df2.shape
df2 = df2.drop('Unnamed: 0', 1)
df2.head(3)









    



(500, 5)






    Out[7]:






  
    
      
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      Warm-Spinach-and-Artichoke-Dip-1222123
      4
      Warm Spinach and Artichoke Dip
      Deliciously Ella
    
    
      1
      Honey-Garlic-Crockpot-Meatballs-1245745
      4
      Honey Garlic Crockpot Meatballs
      Family Fresh Meals
    
    
      2
      Cheesy-Spinach-_-Artichoke-Dip-1063719
      4
      Cheesy Spinach & Artichoke Dip
      Shibley Smiles



In [8]:

    
print df3.shape
df3 = df3.drop('Unnamed: 0', 1)
df3.head(3)









    



(500, 5)






    Out[8]:






  
    
      
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      Pizza-dip-333915
      5
      Pizza Dip
      How Sweet It Is
    
    
      1
      The-Greatest-Hummus-Recipe-Ever_-1663684
      4
      The Greatest Hummus Recipe Ever!
      Gluten Free Yummy
    
    
      2
      Garlic-Herb-Cream-Cheese-Dip-972926
      4
      Garlic Herb Cream Cheese Dip
      Cooking On A Budget



In [12]:

    
#concatenate the main tables. 
APP_main= pd.concat([df, df1, df2, df3])
#create a new dataframe with selected columns
APP_main_reduced = APP_main.drop(['recipeName', 'sourceDisplayName'], axis = 1)



In [15]:

    
#peek at dataframe
print APP_main.shape
APP_main.id.duplicated().sum()









    



(2000, 4)






    Out[15]:





0



In [16]:

    
for i in APP_main.duplicated('id'):
    if i == True:
        print i

Flavors



In [17]:

    
fdf = pd.read_csv('APP_flavors.csv')
fdf1 = pd.read_csv('APP_flavors_1.csv')
fdf2 = pd.read_csv('APP_flavors_2.csv')
fdf3 = pd.read_csv('APP_flavors_3.csv')



In [18]:

    
print fdf.shape
fdf = fdf.drop('Unnamed: 0', 1)
fdf = fdf.rename(columns = {'index':'id'})
fdf.head(3)









    



(500, 8)






    Out[18]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      3-Ingredient-Campfire-Dip-1708222
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      5-Ingredient-Jalapeno-Cheddar-Dip-1713067
      0.666667
      0.833333
      0.833333
      0.500000
      0.166667
      0.166667
    
    
      2
      5-Ingredient-Salmon-Spread-with-Lemon_-Dill-an...
      0.500000
      0.500000
      0.000000
      0.666667
      0.500000
      0.333333



In [19]:

    
print fdf1.shape
fdf1 = fdf1.drop('Unnamed: 0', 1)
fdf1.head(3)









    



(500, 8)






    Out[19]:






  
    
      
      index
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      2-Ingredient-Queso-Dip-1077872
      0.833333
      0.166667
      0.833333
      0.833333
      0.166667
      0.333333
    
    
      1
      3-Ingredient-Candied-Kielbasa-Bites-1689830
      0.833333
      0.333333
      0.166667
      0.833333
      0.833333
      0.666667
    
    
      2
      4-Cheese-Hot-Roasted-Red-Pepper-Dip-439566
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN



In [20]:

    
#change column name index to id
fdf1=fdf1.rename(columns= {'index': 'id'})
fdf1.columns









    Out[20]:





Index([u'id', u'bitter', u'meaty', u'piquant', u'salty', u'sour', u'sweet'], dtype='object')



In [21]:

    
print fdf2.shape
fdf2 = fdf2.drop('Unnamed: 0', 1)
fdf2 = fdf2.rename(columns = {'index':'id'})
fdf2.head(3)









    



(500, 8)






    Out[21]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      2-Minute-Avocado-Dip-1626429
      0.833333
      0.166667
      0.833333
      0.833333
      0.666667
      0.166667
    
    
      1
      7-Layer-Flag-Dip-for-4th-of-July_-1636222
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      Arugula-and-Chive-Baked-Egg-Cups-1632193
      0.333333
      0.833333
      0.000000
      0.833333
      0.166667
      0.166667



In [22]:

    
print fdf3.shape
fdf3 = fdf3.drop('Unnamed: 0', 1)
fdf3 = fdf3.rename(columns = {'index':'id'})
fdf3.head(3)









    



(500, 8)






    Out[22]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      3-Ingredient-Chili-Cheese-Dip-1004579
      0.833333
      0.833333
      0.833333
      0.833333
      0.166667
      0.166667
    
    
      1
      4-Ingredient-Cheese-Ball-1599980
      0.833333
      0.833333
      0.000000
      0.833333
      0.166667
      0.166667
    
    
      2
      4-ingredient-Sweet-_-Spicy-Wings-1225519
      0.166667
      0.833333
      0.666667
      0.166667
      0.166667
      0.500000



In [23]:

    
#concatenate the flavor tables. 
APP_flavors= pd.concat([fdf, fdf1, fdf2, fdf3])



In [24]:

    
#peek at dataframe
print APP_flavors.shape
APP_flavors.head(3)









    



(2000, 7)






    Out[24]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      3-Ingredient-Campfire-Dip-1708222
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      5-Ingredient-Jalapeno-Cheddar-Dip-1713067
      0.666667
      0.833333
      0.833333
      0.500000
      0.166667
      0.166667
    
    
      2
      5-Ingredient-Salmon-Spread-with-Lemon_-Dill-an...
      0.500000
      0.500000
      0.000000
      0.666667
      0.500000
      0.333333



In [25]:

    
for i in APP_flavors.duplicated('id'):
    if i == True:
        print i

Cuisine



In [26]:

    
cdf = pd.read_csv('APP_cuisines.csv')
cdf1 = pd.read_csv('APP_cuisines_1.csv')
cdf2 = pd.read_csv('APP_cuisines_2.csv')
cdf3 = pd.read_csv('APP_cuisines_3.csv')



In [27]:

    
print cdf.shape
cdf = cdf.drop('Unnamed: 0', 1)
cdf = cdf.rename(columns = {'index':'id'})
print cdf.columns
cdf.head(3)









    



(500, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')






    Out[27]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      3-Ingredient-Campfire-Dip-1708222
      0
      0
      1
      0
      0
      0
      0
      0
      0
      ...
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      5-Ingredient-Jalapeno-Cheddar-Dip-1713067
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      5-Ingredient-Salmon-Spread-with-Lemon_-Dill-an...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [28]:

    
print cdf1.shape
cdf1 = cdf1.drop('Unnamed: 0', 1)
print cdf1.columns
cdf1.head(3)









    



(500, 28)
Index([u'index', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')






    Out[28]:






  
    
      
      index
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      2-Ingredient-Queso-Dip-1077872
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      3-Ingredient-Candied-Kielbasa-Bites-1689830
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      4-Cheese-Hot-Roasted-Red-Pepper-Dip-439566
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [29]:

    
#change column name index to id
cdf1=cdf1.rename(columns= {'index': 'id'})
cdf1.columns









    Out[29]:





Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')



In [30]:

    
print cdf2.shape
cdf2 = cdf2.drop('Unnamed: 0', 1)
cdf2 = cdf2.rename(columns = {'index':'id'})
print cdf2.columns
cdf2.head(3)









    



(500, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')






    Out[30]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      2-Minute-Avocado-Dip-1626429
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      7-Layer-Flag-Dip-for-4th-of-July_-1636222
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      Arugula-and-Chive-Baked-Egg-Cups-1632193
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [31]:

    
print cdf3.shape
cdf3 = cdf3.drop('Unnamed: 0', 1)
cdf3 = cdf3.rename(columns = {'index':'id'})
print cdf3.columns
cdf3.head(3)









    



(500, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')






    Out[31]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      3-Ingredient-Chili-Cheese-Dip-1004579
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      4-Ingredient-Cheese-Ball-1599980
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      4-ingredient-Sweet-_-Spicy-Wings-1225519
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [32]:

    
#concatenate the cuisine tables. 
APP_cuisines= pd.concat([cdf, cdf1, cdf2, cdf3])



In [29]:

    
#peek at dataframe
print APP_cuisines.shape
APP_cuisines.head(3)









    



(2000, 27)






    Out[29]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      3-Ingredient-Campfire-Dip-1708222
      0
      0
      1
      0
      0
      0
      0
      0
      0
      ...
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      5-Ingredient-Jalapeno-Cheddar-Dip-1713067
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      5-Ingredient-Salmon-Spread-with-Lemon_-Dill-an...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [33]:

    
for i in APP_cuisines.duplicated('id'):
    if i == True:
        print i

Details



In [34]:

    
ddf = pd.read_csv('APP_details.csv')
ddf1 = pd.read_csv('APP_details_1.csv')
ddf2 = pd.read_csv('APP_details_2.csv')
ddf3 = pd.read_csv('APP_details_3.csv')



In [35]:

    
print ddf.shape
ddf = ddf.drop('Unnamed: 0', 1)
print ddf.columns
ddf.head(3)









    



(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')






    Out[35]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Bacon-Cheddar-Pinwheels-768341
      NaN
      5
      16
      900.0
      2100
    
    
      1
      Fiesta-Corn-Dip-1711704
      NaN
      7
      4
      900.0
      900
    
    
      2
      Crunchy-Taco-Cups-1684027
      NaN
      5
      12
      NaN
      2400



In [36]:

    
print ddf1.shape
ddf1 = ddf1.drop('Unnamed: 0', 1)
print ddf1.columns
ddf1.head(3)









    



(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')






    Out[36]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Skinny-Hummus-With-Zero-Tahini-1627413
      NaN
      8
      12
      NaN
      600.0
    
    
      1
      Pico-De-Gallo-1682898
      NaN
      8
      4
      NaN
      1200.0
    
    
      2
      Fiesta-Baked-Cheese-Dip-1696540
      2400.0
      7
      32
      1200.0
      3600.0



In [37]:

    
print ddf2.shape
ddf2 = ddf2.drop('Unnamed: 0', 1)
print ddf2.columns
ddf2.head(3)









    



(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')






    Out[37]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Warm-Spinach-and-Artichoke-Dip-1222123
      NaN
      9
      4
      NaN
      1800.0
    
    
      1
      Honey-Garlic-Crockpot-Meatballs-1245745
      14400.0
      6
      7
      300.0
      14700.0
    
    
      2
      Cheesy-Spinach-_-Artichoke-Dip-1063719
      NaN
      6
      4
      NaN
      2100.0



In [38]:

    
print ddf3.shape
ddf3 = ddf3.drop('Unnamed: 0', 1)
print ddf3.columns
ddf3.head(3)









    



(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')






    Out[38]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Pizza-dip-333915
      NaN
      5
      4
      NaN
      2100.0
    
    
      1
      The-Greatest-Hummus-Recipe-Ever_-1663684
      NaN
      7
      5
      900.0
      900.0
    
    
      2
      Garlic-Herb-Cream-Cheese-Dip-972926
      NaN
      6
      4
      NaN
      900.0



In [39]:

    
#concatenate the details tables. 
APP_details= pd.concat([ddf, ddf1, ddf2, ddf3])



In [40]:

    
#peek at dataframe
print APP_details.shape
APP_details.head(3)









    



(2000, 6)






    Out[40]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Bacon-Cheddar-Pinwheels-768341
      NaN
      5
      16
      900.0
      2100.0
    
    
      1
      Fiesta-Corn-Dip-1711704
      NaN
      7
      4
      900.0
      900.0
    
    
      2
      Crunchy-Taco-Cups-1684027
      NaN
      5
      12
      NaN
      2400.0



In [41]:

    
for i in APP_details.duplicated('id'):
    if i == True:
        print i

Ingredients



In [42]:

    
idf = pd.read_csv('APP_ingredients.csv')
idf1 = pd.read_csv('APP_ingredients_1.csv')
idf2 = pd.read_csv('APP_ingredients_2.csv')
idf3 = pd.read_csv('APP_ingredients_3.csv')



In [43]:

    
print idf.shape
idf = idf.drop('Unnamed: 0', 1)
print idf.columns
idf.head(3)









    



(500, 758)
Index([u'id', u'course', u'(10 oz.) frozen spinach', u'agave nectar',
       u'aged balsamic vinegar', u'aioli', u'all-purpose flour', u'allspice',
       u'allspice berries', u'almonds',
       ...
       u'wonton wrappers', u'worcestershire sauce', u'yellow bell pepper',
       u'yellow corn', u'yellow mustard', u'yellow onion', u'yellow peppers',
       u'yoghurt', u'yoplait', u'zucchini'],
      dtype='object', length=757)






    Out[43]:






  
    
      
      id
      course
      (10 oz.) frozen spinach
      agave nectar
      aged balsamic vinegar
      aioli
      all-purpose flour
      allspice
      allspice berries
      almonds
      ...
      wonton wrappers
      worcestershire sauce
      yellow bell pepper
      yellow corn
      yellow mustard
      yellow onion
      yellow peppers
      yoghurt
      yoplait
      zucchini
    
  
  
    
      0
      Bacon-Cheddar-Pinwheels-768341
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      Fiesta-Corn-Dip-1711704
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      Crunchy-Taco-Cups-1684027
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

3 rows × 757 columns



In [44]:

    
print idf1.shape
idf1 = idf1.drop('Unnamed: 0', 1)
print idf1.columns
idf1.head(3)









    



(500, 738)
Index([u'id', u'course', u'accent', u'adobo', u'adobo sauce',
       u'aged balsamic vinegar', u'albacore', u'ale', u'all-purpose flour',
       u'almonds',
       ...
       u'wish bone ranch dress', u'wish-bone light buffalo ranch dressing',
       u'wonton wrappers', u'worcestershire sauce', u'yellow corn',
       u'yellow mustard', u'yellow onion', u'yellow peppers', u'yoghurt',
       u'zucchini'],
      dtype='object', length=737)






    Out[44]:






  
    
      
      id
      course
      accent
      adobo
      adobo sauce
      aged balsamic vinegar
      albacore
      ale
      all-purpose flour
      almonds
      ...
      wish bone ranch dress
      wish-bone light buffalo ranch dressing
      wonton wrappers
      worcestershire sauce
      yellow corn
      yellow mustard
      yellow onion
      yellow peppers
      yoghurt
      zucchini
    
  
  
    
      0
      Skinny-Hummus-With-Zero-Tahini-1627413
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      Pico-De-Gallo-1682898
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      Fiesta-Baked-Cheese-Dip-1696540
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

3 rows × 737 columns



In [45]:

    
print idf2.shape
idf2 = idf2.drop('Unnamed: 0', 1)
print idf2.columns
idf2.head(3)









    



(500, 746)
Index([u'id', u'course', u'(10 oz.) frozen spinach', u'adobo sauce',
       u'agave nectar', u'all-purpose flour', u'almond flour', u'almond milk',
       u'almonds', u'american cheese',
       ...
       u'worcestershire sauce', u'yellow mustard', u'yellow onion',
       u'yellow peppers', u'yoghurt', u'yoghurt natural low fat', u'za'atar',
       u'zesty italian dressing', u'zucchini', u'zucchini blossoms'],
      dtype='object', length=745)






    Out[45]:






  
    
      
      id
      course
      (10 oz.) frozen spinach
      adobo sauce
      agave nectar
      all-purpose flour
      almond flour
      almond milk
      almonds
      american cheese
      ...
      worcestershire sauce
      yellow mustard
      yellow onion
      yellow peppers
      yoghurt
      yoghurt natural low fat
      za'atar
      zesty italian dressing
      zucchini
      zucchini blossoms
    
  
  
    
      0
      Warm-Spinach-and-Artichoke-Dip-1222123
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      Honey-Garlic-Crockpot-Meatballs-1245745
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      Cheesy-Spinach-_-Artichoke-Dip-1063719
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

3 rows × 745 columns



In [46]:

    
print idf3.shape
idf3 = idf3.drop('Unnamed: 0', 1)
print idf3.columns
idf3.head()









    



(500, 708)
Index([u'id', u'course', u'adobo', u'agave nectar', u'aged cheddar cheese',
       u'all-purpose flour', u'almond flour', u'almond milk', u'almonds',
       u'american cheese',
       ...
       u'wonton wrappers', u'worcestershire sauce', u'yellow mustard',
       u'yellow onion', u'yoghurt', u'yoplait', u'yukon gold potatoes',
       u'za'atar', u'zesty italian dressing', u'zucchini'],
      dtype='object', length=707)






    Out[46]:






  
    
      
      id
      course
      adobo
      agave nectar
      aged cheddar cheese
      all-purpose flour
      almond flour
      almond milk
      almonds
      american cheese
      ...
      wonton wrappers
      worcestershire sauce
      yellow mustard
      yellow onion
      yoghurt
      yoplait
      yukon gold potatoes
      za'atar
      zesty italian dressing
      zucchini
    
  
  
    
      0
      Pizza-dip-333915
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      The-Greatest-Hummus-Recipe-Ever_-1663684
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      Garlic-Herb-Cream-Cheese-Dip-972926
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      3
      Frank_s-RedHot-Buffalo-Chicken-Dip-1089684
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      4
      Marinated-Japanese-Mushrooms-1102960
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

5 rows × 707 columns



In [57]:

    
#concatenate the ingredient tables. 
APP_ing= pd.concat([idf, idf1, idf2, idf3])
#create a new dataframe with selected columns
APP_ing_reduced = APP_ing[['id', 'ingredient_list']]



In [59]:

    
#drop unnamed column & make id first column
#APP_ing = APP_ing.drop('Unnamed: 0', 1)
cols = list(APP_ing)
cols.insert(0, cols.pop(cols.index('id')))
APP_ing = APP_ing.ix[:, cols]



In [60]:

    
APP_ing.head(3)









    Out[60]:






  
    
      
      id
      (10 oz.) frozen spinach
      accent
      adobo
      adobo sauce
      agave nectar
      aged balsamic vinegar
      aged cheddar cheese
      aioli
      albacore
      ...
      yellow onion
      yellow peppers
      yoghurt
      yoghurt natural low fat
      yoplait
      yukon gold potatoes
      za'atar
      zesty italian dressing
      zucchini
      zucchini blossoms
    
  
  
    
      0
      Bacon-Cheddar-Pinwheels-768341
      0.0
      NaN
      NaN
      NaN
      0.0
      0.0
      NaN
      0.0
      NaN
      ...
      0.0
      0.0
      0.0
      NaN
      0.0
      NaN
      NaN
      NaN
      0.0
      NaN
    
    
      1
      Fiesta-Corn-Dip-1711704
      0.0
      NaN
      NaN
      NaN
      0.0
      0.0
      NaN
      0.0
      NaN
      ...
      0.0
      0.0
      0.0
      NaN
      0.0
      NaN
      NaN
      NaN
      0.0
      NaN
    
    
      2
      Crunchy-Taco-Cups-1684027
      0.0
      NaN
      NaN
      NaN
      0.0
      0.0
      NaN
      0.0
      NaN
      ...
      0.0
      0.0
      0.0
      NaN
      0.0
      NaN
      NaN
      NaN
      0.0
      NaN
    
  

3 rows × 1438 columns



In [61]:

    
for i in APP_ing.duplicated('id'):
    if i == True:
        print i

Join all tables for Appetizers



In [62]:

    
# set index to column 'id'
_df = [APP_main, APP_main_reduced, APP_cuisines, APP_flavors, APP_details, APP_ing, APP_ing_reduced]

for df in _df:
    df.set_index('id', inplace = True)



In [63]:

    
# join dataframes
APP_data = APP_main.join([APP_cuisines, APP_flavors, APP_details, APP_ing])
APP_data_reduced = APP_main_reduced.join([APP_flavors, APP_details, APP_ing_reduced])
# add course column
APP_data['course'] = 'Appetizer'
APP_data_reduced['course'] = 'Appetizer'



In [64]:

    
APP_data.shape
APP_data.head(3)









    Out[64]:






  
    
      
      rating
      recipeName
      sourceDisplayName
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      ...
      yellow onion
      yellow peppers
      yoghurt
      yoghurt natural low fat
      yoplait
      yukon gold potatoes
      za'atar
      zesty italian dressing
      zucchini
      zucchini blossoms
    
    
      id
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Bacon-Cheddar-Pinwheels-768341
      4
      Bacon-Cheddar Pinwheels
      NaN
      0
      0
      0
      0
      0
      0
      0
      ...
      0.0
      0.0
      0.0
      NaN
      0.0
      NaN
      NaN
      NaN
      0.0
      NaN
    
    
      Fiesta-Corn-Dip-1711704
      4
      Fiesta Corn Dip
      The Hill Hangout
      0
      0
      0
      0
      0
      0
      0
      ...
      0.0
      0.0
      0.0
      NaN
      0.0
      NaN
      NaN
      NaN
      0.0
      NaN
    
    
      Crunchy-Taco-Cups-1684027
      4
      Crunchy Taco Cups
      Kevin and Amanda
      0
      0
      0
      0
      0
      0
      0
      ...
      0.0
      0.0
      0.0
      NaN
      0.0
      NaN
      NaN
      NaN
      0.0
      NaN
    
  

3 rows × 1477 columns



In [67]:

    
#save to csv
APP_data.to_csv('APP_data.csv')
APP_data_reduced.to_csv('APP_data_reduced.csv')

	Unnamed: 0	id	rating	recipeName	sourceDisplayName
0	0	Pizza-dip-333915	5	Pizza Dip	How Sweet It Is
1	1	The-Greatest-Hummus-Recipe-Ever_-1663684	4	The Greatest Hummus Recipe Ever!	Gluten Free Yummy
2	2	Garlic-Herb-Cream-Cheese-Dip-972926	4	Garlic Herb Cream Cheese Dip	Cooking On A Budget
3	3	Frank_s-RedHot-Buffalo-Chicken-Dip-1089684	3	Frank's RedHot Buffalo Chicken Dip	Frank's Red Hot
4	4	Marinated-Japanese-Mushrooms-1102960	4	Marinated Japanese Mushrooms	No Recipes

	id	rating	recipeName	sourceDisplayName
0	Skinny-Hummus-With-Zero-Tahini-1627413	4	Skinny Hummus With Zero Tahini	The Kitchen Girl
1	Pico-De-Gallo-1682898	4	Pico De Gallo	The Girl Who Ate Everything
2	Fiesta-Baked-Cheese-Dip-1696540	3	Fiesta Baked Cheese Dip	Kraft

	id	rating	recipeName	sourceDisplayName
0	Warm-Spinach-and-Artichoke-Dip-1222123	4	Warm Spinach and Artichoke Dip	Deliciously Ella
1	Honey-Garlic-Crockpot-Meatballs-1245745	4	Honey Garlic Crockpot Meatballs	Family Fresh Meals
2	Cheesy-Spinach-_-Artichoke-Dip-1063719	4	Cheesy Spinach & Artichoke Dip	Shibley Smiles

	id	bitter	meaty	piquant	salty	sour	sweet
0	3-Ingredient-Campfire-Dip-1708222	NaN	NaN	NaN	NaN	NaN	NaN
1	5-Ingredient-Jalapeno-Cheddar-Dip-1713067	0.666667	0.833333	0.833333	0.500000	0.166667	0.166667
2	5-Ingredient-Salmon-Spread-with-Lemon_-Dill-an...	0.500000	0.500000	0.000000	0.666667	0.500000	0.333333

	index	bitter	meaty	piquant	salty	sour	sweet
0	2-Ingredient-Queso-Dip-1077872	0.833333	0.166667	0.833333	0.833333	0.166667	0.333333
1	3-Ingredient-Candied-Kielbasa-Bites-1689830	0.833333	0.333333	0.166667	0.833333	0.833333	0.666667
2	4-Cheese-Hot-Roasted-Red-Pepper-Dip-439566	NaN	NaN	NaN	NaN	NaN	NaN

	id	bitter	meaty	piquant	salty	sour	sweet
0	2-Minute-Avocado-Dip-1626429	0.833333	0.166667	0.833333	0.833333	0.666667	0.166667
1	7-Layer-Flag-Dip-for-4th-of-July_-1636222	NaN	NaN	NaN	NaN	NaN	NaN
2	Arugula-and-Chive-Baked-Egg-Cups-1632193	0.333333	0.833333	0.000000	0.833333	0.166667	0.166667

	id	bitter	meaty	piquant	salty	sour	sweet
0	3-Ingredient-Chili-Cheese-Dip-1004579	0.833333	0.833333	0.833333	0.833333	0.166667	0.166667
1	4-Ingredient-Cheese-Ball-1599980	0.833333	0.833333	0.000000	0.833333	0.166667	0.166667
2	4-ingredient-Sweet-_-Spicy-Wings-1225519	0.166667	0.833333	0.666667	0.166667	0.166667	0.500000

	id	cookTimeInSeconds	ingredientCount	numberOfServings	prepTimeInSeconds	totalTimeInSeconds
0	Bacon-Cheddar-Pinwheels-768341	NaN	5	16	900.0	2100
1	Fiesta-Corn-Dip-1711704	NaN	7	4	900.0	900
2	Crunchy-Taco-Cups-1684027	NaN	5	12	NaN	2400

	id	cookTimeInSeconds	ingredientCount	numberOfServings	prepTimeInSeconds	totalTimeInSeconds
0	Skinny-Hummus-With-Zero-Tahini-1627413	NaN	8	12	NaN	600.0
1	Pico-De-Gallo-1682898	NaN	8	4	NaN	1200.0
2	Fiesta-Baked-Cheese-Dip-1696540	2400.0	7	32	1200.0	3600.0

	id	cookTimeInSeconds	ingredientCount	numberOfServings	prepTimeInSeconds	totalTimeInSeconds
0	Warm-Spinach-and-Artichoke-Dip-1222123	NaN	9	4	NaN	1800.0
1	Honey-Garlic-Crockpot-Meatballs-1245745	14400.0	6	7	300.0	14700.0
2	Cheesy-Spinach-_-Artichoke-Dip-1063719	NaN	6	4	NaN	2100.0

	id	course	...	wonton wrappers
0	Bacon-Cheddar-Pinwheels-768341	Breakfast and Brunch	...	0.0
1	Fiesta-Corn-Dip-1711704	Breakfast and Brunch	...	0.0
2	Crunchy-Taco-Cups-1684027	Breakfast and Brunch	...	1.0

	rating	recipeName	sourceDisplayName	American	Asian	Barbecue	Cajun & Creole	Chinese	Cuban	English	...	yellow onion	yellow peppers	yoghurt	yoghurt natural low fat	yoplait	yukon gold potatoes	za'atar	zesty italian dressing	zucchini	zucchini blossoms
id
Bacon-Cheddar-Pinwheels-768341	4	Bacon-Cheddar Pinwheels	NaN	0	0	0	0	0	0	0	...	0.0	0.0	0.0	NaN	0.0	NaN	NaN	NaN	0.0	NaN
Fiesta-Corn-Dip-1711704	4	Fiesta Corn Dip	The Hill Hangout	0	0	0	0	0	0	0	...	0.0	0.0	0.0	NaN	0.0	NaN	NaN	NaN	0.0	NaN
Crunchy-Taco-Cups-1684027	4	Crunchy Taco Cups	Kevin and Amanda	0	0	0	0	0	0	0	...	0.0	0.0	0.0	NaN	0.0	NaN	NaN	NaN	0.0	NaN