notebook.community

Edit and run



In [1]:

    
import pandas as pd
import numpy as np



In [59]:

    
df = pd.read_csv('DS_main.csv')
df1 = pd.read_csv('DS_main_1.csv')
df2 = pd.read_csv('DS_main_2.csv')
df3 = pd.read_csv('DS_main_3.csv')



In [60]:

    
print df.shape
df.head(3)









    



(500, 5)






    Out[60]:






  
    
      
      Unnamed: 0
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      0
      Easy_-No-Bake_-Mud-Pie-1708171
      4
      Easy, No Bake, Mud Pie
      Ann's Entitled Life
    
    
      1
      1
      Tita_s-Fruit-Dessert-_SundaySupper-1706994
      4
      Tita's Fruit Dessert #SundaySupper
      Basic N Delicious
    
    
      2
      2
      No-Bake-Oreo-Cheesecake-1710284
      3
      No Bake Oreo Cheesecake
      Cook and Share



In [61]:

    
df = df.drop('Unnamed: 0', 1)



In [62]:

    
print df1.shape
df1 = df1.drop('Unnamed: 0', 1)
df1.head(3)









    



(500, 5)






    Out[62]:






  
    
      
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      Lemonade-Popsicles-1682120
      3
      Lemonade Popsicles
      Beachbody
    
    
      1
      Best-Peanut-Butter-Buttercream-Frosting-1682977
      4
      Best Peanut Butter Buttercream Frosting
      Two Sisters Crafting
    
    
      2
      Omega-Chocolate-Bars-1695745
      4
      Omega Chocolate Bars
      A Healthy Life For Me



In [63]:

    
print df2.shape
df2 = df2.drop('Unnamed: 0', 1)
df2.head(3)









    



(500, 5)






    Out[63]:






  
    
      
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      Raw-Brownies-With-Frosting-1707657
      4
      Raw Brownies With Frosting
      Planticize
    
    
      1
      Easy-Cinnamon-Roll-Cake-1552842
      4
      Easy Cinnamon Roll Cake
      Cookies & Cups
    
    
      2
      Coconut-Ice-Cream-1711710
      4
      Coconut Ice Cream
      Get Inspired Everyday!



In [64]:

    
print df3.shape
df3 = df3.drop('Unnamed: 0', 1)
df3.head(3)









    



(500, 5)






    Out[64]:






  
    
      
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      Snickers-Chocolate-Cake-Mix-Bars-1710321
      4
      Snickers Chocolate Cake Mix Bars
      Kleinworth Co.
    
    
      1
      Mango-Coconut-Popsicles-1699009
      4
      Mango Coconut Popsicles
      Eat Good 4 Life
    
    
      2
      Man-Bars-1703874
      4
      Man Bars
      South your mouth



In [65]:

    
#concatenate the main tables. 
DS_main= pd.concat([df, df1, df2, df3])
#create a new dataframe with selected columns
DS_main_reduced = DS_main.drop(['recipeName', 'sourceDisplayName'], axis = 1)



In [66]:

    
#peek at dataframe
print DS_main.shape
DS_main.head(3)









    



(2000, 4)






    Out[66]:






  
    
      
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      Easy_-No-Bake_-Mud-Pie-1708171
      4
      Easy, No Bake, Mud Pie
      Ann's Entitled Life
    
    
      1
      Tita_s-Fruit-Dessert-_SundaySupper-1706994
      4
      Tita's Fruit Dessert #SundaySupper
      Basic N Delicious
    
    
      2
      No-Bake-Oreo-Cheesecake-1710284
      3
      No Bake Oreo Cheesecake
      Cook and Share



In [67]:

    
for i in DS_main.duplicated('id'):
    if i == True:
        print i
DS_main = DS_main.drop_duplicates('id')
DS_main.shape









    



True






    Out[67]:





(1999, 4)

Flavors



In [68]:

    
fdf = pd.read_csv('DS_flavors.csv')
fdf1 = pd.read_csv('DS_flavors_1.csv')
fdf2 = pd.read_csv('DS_flavors_2.csv')
fdf3 = pd.read_csv('DS_flavors_3.csv')



In [69]:

    
print fdf.shape
fdf = fdf.drop('Unnamed: 0', 1)
fdf = fdf.rename(columns = {'index':'id'})
fdf.head(3)









    



(500, 8)






    Out[69]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      -No-Bake-Caramel-Cheesecake-Shooters-1706735
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      1-2-3-4-Cupcakes-760182
      0.166667
      0.333333
      0.0
      0.333333
      0.166667
      0.833333
    
    
      2
      1-Minute-Chocolate-Frosting-1690971
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN



In [70]:

    
print fdf1.shape
fdf1 = fdf1.drop('Unnamed: 0', 1)
fdf1 = fdf1.rename(columns = {'index':'id'})
fdf1.head(3)









    



(500, 8)






    Out[70]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      10-Layer-Chocolate-Caramel-Mousse-Cake-1697890
      0.166667
      0.166667
      0.0
      0.5
      0.166667
      0.833333
    
    
      1
      2-Bite-Cherry-Pies-1709499
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      3-Ingredient-Brownies-1703671
      0.166667
      0.166667
      0.0
      0.0
      0.166667
      0.833333



In [71]:

    
print fdf2.shape
fdf2 = fdf2.drop('Unnamed: 0', 1)
fdf2 = fdf2.rename(columns = {'index':'id'})
fdf2.head(3)









    



(500, 8)






    Out[71]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      1-Minute-Sugar-Free-Peanut-Butter-Mug-Cake-169...
      0.833333
      0.833333
      0.0
      0.833333
      0.166667
      0.333333
    
    
      1
      10-Minute-Strawberry-Jam-1702695
      0.000000
      0.166667
      0.0
      0.000000
      0.833333
      0.666667
    
    
      2
      1_2-Syn-Strawberry-Mousse-1703461
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN



In [72]:

    
print fdf3.shape
fdf3 = fdf3.drop('Unnamed: 0', 1)
fdf3 = fdf3.rename(columns = {'index':'id'})
fdf3.head(3)









    



(500, 8)






    Out[72]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      100-Calorie-Chocolate-Peanut-Butter-Squares-15...
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      15-Minute-Strawberry-Jam-1683439
      0.0
      0.166667
      0.0
      0.0
      0.833333
      0.666667
    
    
      2
      1_-Classic-Chocolate-Cake-1708247
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN



In [73]:

    
#concatenate the flavor tables. 
DS_flavors= pd.concat([fdf, fdf1, fdf2, fdf3])



In [74]:

    
#peek at dataframe
print DS_flavors.shape
DS_flavors.head(3)









    



(2000, 7)






    Out[74]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      -No-Bake-Caramel-Cheesecake-Shooters-1706735
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      1-2-3-4-Cupcakes-760182
      0.166667
      0.333333
      0.0
      0.333333
      0.166667
      0.833333
    
    
      2
      1-Minute-Chocolate-Frosting-1690971
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN



In [75]:

    
for i in DS_flavors.duplicated('id'):
    if i == True:
        print i
DS_flavors = DS_flavors.drop_duplicates('id')
DS_flavors.shape









    



True






    Out[75]:





(1999, 7)

Cuisine



In [76]:

    
cdf = pd.read_csv('DS_cuisines.csv')
cdf1 = pd.read_csv('DS_cuisines_1.csv')
cdf2 = pd.read_csv('DS_cuisines_2.csv')
cdf3 = pd.read_csv('DS_cuisines_3.csv')



In [77]:

    
print cdf.shape
cdf = cdf.drop('Unnamed: 0', 1)
cdf = cdf.rename(columns = {'index':'id'})
print cdf.columns
cdf.head(3)









    



(500, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')






    Out[77]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      -No-Bake-Caramel-Cheesecake-Shooters-1706735
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      1-2-3-4-Cupcakes-760182
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      1-Minute-Chocolate-Frosting-1690971
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [78]:

    
print cdf1.shape
cdf1 = cdf1.drop('Unnamed: 0', 1)
cdf1 = cdf1.rename(columns = {'index':'id'})
print cdf1.columns
cdf1.head(3)









    



(500, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')






    Out[78]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      10-Layer-Chocolate-Caramel-Mousse-Cake-1697890
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      2-Bite-Cherry-Pies-1709499
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      1
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      3-Ingredient-Brownies-1703671
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [79]:

    
print cdf2.shape
cdf2 = cdf2.drop('Unnamed: 0', 1)
cdf2 = cdf2.rename(columns = {'index':'id'})
print cdf2.columns
cdf2.head(3)









    



(500, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')






    Out[79]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      1-Minute-Sugar-Free-Peanut-Butter-Mug-Cake-169...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      10-Minute-Strawberry-Jam-1702695
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      1_2-Syn-Strawberry-Mousse-1703461
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [80]:

    
print cdf3.shape
cdf3 = cdf3.drop('Unnamed: 0', 1)
cdf3 = cdf3.rename(columns = {'index':'id'})
print cdf3.columns
cdf3.head(3)









    



(500, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')






    Out[80]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      100-Calorie-Chocolate-Peanut-Butter-Squares-15...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      15-Minute-Strawberry-Jam-1683439
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      1_-Classic-Chocolate-Cake-1708247
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [81]:

    
#concatenate the cuisines tables. 
DS_cuisines= pd.concat([cdf, cdf1, cdf2, cdf3])



In [82]:

    
#peek at dataframe
print DS_cuisines.shape
DS_cuisines.head(3)









    



(2000, 27)






    Out[82]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      -No-Bake-Caramel-Cheesecake-Shooters-1706735
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      1-2-3-4-Cupcakes-760182
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      1-Minute-Chocolate-Frosting-1690971
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [83]:

    
for i in DS_cuisines.duplicated('id'):
    if i == True:
        print i
DS_cuisines = DS_cuisines.drop_duplicates('id')
DS_cuisines.shape









    



True






    Out[83]:





(1999, 27)

Details



In [84]:

    
ddf = pd.read_csv('DS_details.csv')
ddf1 = pd.read_csv('DS_details_1.csv')
ddf2 = pd.read_csv('DS_details_2.csv')
ddf3 = pd.read_csv('DS_details_3.csv')



In [85]:

    
print ddf.shape
ddf = ddf.drop('Unnamed: 0', 1)
print ddf.columns
ddf.head(3)









    



(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')






    Out[85]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Easy_-No-Bake_-Mud-Pie-1708171
      11700.0
      5
      6
      900.0
      12600
    
    
      1
      Tita_s-Fruit-Dessert-_SundaySupper-1706994
      NaN
      6
      4
      900.0
      900
    
    
      2
      No-Bake-Oreo-Cheesecake-1710284
      NaN
      6
      4
      1800.0
      1800



In [86]:

    
print ddf1.shape
ddf1 = ddf1.drop('Unnamed: 0', 1)
print ddf1.columns
ddf1.head(3)









    



(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')






    Out[86]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Lemonade-Popsicles-1682120
      NaN
      3
      4.0
      600.0
      15000
    
    
      1
      Best-Peanut-Butter-Buttercream-Frosting-1682977
      NaN
      5
      4.0
      NaN
      1200
    
    
      2
      Omega-Chocolate-Bars-1695745
      120.0
      5
      25.0
      1080.0
      1200



In [87]:

    
print ddf2.shape
ddf2 = ddf2.drop('Unnamed: 0', 1)
print ddf2.columns
ddf2.head(3)









    



(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')






    Out[87]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Raw-Brownies-With-Frosting-1707657
      NaN
      10
      4
      900.0
      900
    
    
      1
      Easy-Cinnamon-Roll-Cake-1552842
      NaN
      9
      10
      NaN
      3000
    
    
      2
      Coconut-Ice-Cream-1711710
      NaN
      2
      4
      300.0
      300



In [88]:

    
print ddf3.shape
ddf3 = ddf3.drop('Unnamed: 0', 1)
print ddf3.columns
ddf3.head(3)









    



(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')






    Out[88]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Snickers-Chocolate-Cake-Mix-Bars-1710321
      3300.0
      4
      4
      480.0
      3780
    
    
      1
      Mango-Coconut-Popsicles-1699009
      NaN
      5
      10
      NaN
      300
    
    
      2
      Man-Bars-1703874
      NaN
      7
      24
      NaN
      2400



In [89]:

    
#concatenate the detail tables. 
DS_details= pd.concat([ddf, ddf1, ddf2, ddf3])



In [90]:

    
#peek at dataframe
print DS_details.shape
DS_details.head(3)









    



(2000, 6)






    Out[90]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Easy_-No-Bake_-Mud-Pie-1708171
      11700.0
      5
      6.0
      900.0
      12600
    
    
      1
      Tita_s-Fruit-Dessert-_SundaySupper-1706994
      NaN
      6
      4.0
      900.0
      900
    
    
      2
      No-Bake-Oreo-Cheesecake-1710284
      NaN
      6
      4.0
      1800.0
      1800



In [91]:

    
for i in DS_details.duplicated('id'):
    if i == True:
        print i
DS_details = DS_details.drop_duplicates('id')
DS_details.shape









    



True






    Out[91]:





(1999, 6)

Ingredients



In [92]:

    
idf = pd.read_csv('DS_ingredients.csv')
idf1 = pd.read_csv('DS_ingredients_1.csv')
idf2 = pd.read_csv('DS_ingredients_2.csv')
idf3 = pd.read_csv('DS_ingredients_3.csv')



In [93]:

    
print idf.shape
idf = idf.drop('Unnamed: 0', 1)
print idf.columns
idf.head(3)









    



(500, 603)
Index([u'id', u'course', u'active dry yeast', u'agave nectar', u'alcohol',
       u'all-purpose flour', u'allspice', u'almond butter', u'almond extract',
       u'almond flour',
       ...
       u'xylitol sweetener', u'yellow cake mix', u'yoghurt', u'yogurt',
       u'yolk', u'yoplait', u'yoplait greek 100 blackberry pie yogurt',
       u'yoplait greek key lime pie yogurt',
       u'yoplait greek lemon meringue yogurt', u'zucchini'],
      dtype='object', length=602)






    Out[93]:






  
    
      
      id
      course
      active dry yeast
      agave nectar
      alcohol
      all-purpose flour
      allspice
      almond butter
      almond extract
      almond flour
      ...
      xylitol sweetener
      yellow cake mix
      yoghurt
      yogurt
      yolk
      yoplait
      yoplait greek 100 blackberry pie yogurt
      yoplait greek key lime pie yogurt
      yoplait greek lemon meringue yogurt
      zucchini
    
  
  
    
      0
      Easy_-No-Bake_-Mud-Pie-1708171
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      Tita_s-Fruit-Dessert-_SundaySupper-1706994
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      No-Bake-Oreo-Cheesecake-1710284
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

3 rows × 602 columns



In [94]:

    
print idf1.shape
idf1 = idf1.drop('Unnamed: 0', 1)
print idf1.columns
idf1.head(3)









    



(500, 609)
Index([u'id', u'course', u'agar', u'agave nectar', u'all-purpose flour',
       u'allspice', u'almond butter', u'almond extract', u'almond flour',
       u'almond meal',
       ...
       u'yellow cake mix', u'yellow cornmeal', u'yellow food coloring',
       u'yoghurt', u'yogurt', u'yolk', u'yoplait greek 100 apple pie yogurt',
       u'yoplait greek 100 mango yogurt',
       u'yoplait greek 100 raspberry yogurt', u'zest'],
      dtype='object', length=608)






    Out[94]:






  
    
      
      id
      course
      agar
      agave nectar
      all-purpose flour
      allspice
      almond butter
      almond extract
      almond flour
      almond meal
      ...
      yellow cake mix
      yellow cornmeal
      yellow food coloring
      yoghurt
      yogurt
      yolk
      yoplait greek 100 apple pie yogurt
      yoplait greek 100 mango yogurt
      yoplait greek 100 raspberry yogurt
      zest
    
  
  
    
      0
      Lemonade-Popsicles-1682120
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      Best-Peanut-Butter-Buttercream-Frosting-1682977
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      Omega-Chocolate-Bars-1695745
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

3 rows × 608 columns



In [95]:

    
print idf2.shape
idf2 = idf2.drop('Unnamed: 0', 1)
print idf2.columns
idf2.head(3)









    



(500, 629)
Index([u'id', u'course', u'9 inch chocolate crumb crust', u'agar',
       u'agave nectar', u'all-purpose flour', u'allspice', u'almond butter',
       u'almond extract', u'almond flour',
       ...
       u'whole wheat flour', u'whole wheat white flour', u'yellow cake mix',
       u'yellow food coloring', u'yoghurt', u'yogurt', u'yolk', u'yoplait',
       u'yoplait greek 100 pineapple yogurt', u'yoplait greek caramel yogurt'],
      dtype='object', length=628)






    Out[95]:






  
    
      
      id
      course
      9 inch chocolate crumb crust
      agar
      agave nectar
      all-purpose flour
      allspice
      almond butter
      almond extract
      almond flour
      ...
      whole wheat flour
      whole wheat white flour
      yellow cake mix
      yellow food coloring
      yoghurt
      yogurt
      yolk
      yoplait
      yoplait greek 100 pineapple yogurt
      yoplait greek caramel yogurt
    
  
  
    
      0
      Raw-Brownies-With-Frosting-1707657
      Breakfast and Brunch
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      Easy-Cinnamon-Roll-Cake-1552842
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      Coconut-Ice-Cream-1711710
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

3 rows × 628 columns



In [96]:

    
print idf3.shape
idf3 = idf3.drop('Unnamed: 0', 1)
print idf3.columns
idf3.head(3)









    



(500, 652)
Index([u'id', u'course', u'agave nectar', u'all-purpose flour', u'allspice',
       u'almond butter', u'almond extract', u'almond flour', u'almond meal',
       u'almond milk',
       ...
       u'whole wheat flour', u'whole wheat pastry flour',
       u'whole wheat white flour', u'xanthum gum', u'yellow cake mix',
       u'yellow food coloring', u'yoghurt', u'yolk', u'yoplait', u'zest'],
      dtype='object', length=651)






    Out[96]:






  
    
      
      id
      course
      agave nectar
      all-purpose flour
      allspice
      almond butter
      almond extract
      almond flour
      almond meal
      almond milk
      ...
      whole wheat flour
      whole wheat pastry flour
      whole wheat white flour
      xanthum gum
      yellow cake mix
      yellow food coloring
      yoghurt
      yolk
      yoplait
      zest
    
  
  
    
      0
      Snickers-Chocolate-Cake-Mix-Bars-1710321
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      Mango-Coconut-Popsicles-1699009
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      1.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      Man-Bars-1703874
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

3 rows × 651 columns



In [97]:

    
#concatenate the ingredients tables. 
DS_ing= pd.concat([idf, idf1, idf2, idf3])
#create a new dataframe with selected columns
DS_ing_reduced = DS_ing[['id', 'ingredient_list']]



In [98]:

    
DS_ing.head(3)









    Out[98]:






  
    
      
      9 inch chocolate crumb crust
      active dry yeast
      agar
      agave nectar
      alcohol
      all-purpose flour
      allspice
      almond butter
      almond extract
      almond flour
      ...
      yoplait greek 100 apple pie yogurt
      yoplait greek 100 blackberry pie yogurt
      yoplait greek 100 mango yogurt
      yoplait greek 100 pineapple yogurt
      yoplait greek 100 raspberry yogurt
      yoplait greek caramel yogurt
      yoplait greek key lime pie yogurt
      yoplait greek lemon meringue yogurt
      zest
      zucchini
    
  
  
    
      0
      NaN
      0.0
      NaN
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      NaN
      0.0
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
      NaN
      0.0
    
    
      1
      NaN
      0.0
      NaN
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      NaN
      0.0
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
      NaN
      0.0
    
    
      2
      NaN
      0.0
      NaN
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      NaN
      0.0
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
      NaN
      0.0
    
  

3 rows × 1174 columns



In [99]:

    
#make id first column
cols = list(DS_ing)
cols.insert(0, cols.pop(cols.index('id')))
DS_ing = DS_ing.ix[:, cols]



In [100]:

    
DS_ing.head(3)









    Out[100]:






  
    
      
      id
      9 inch chocolate crumb crust
      active dry yeast
      agar
      agave nectar
      alcohol
      all-purpose flour
      allspice
      almond butter
      almond extract
      ...
      yoplait greek 100 apple pie yogurt
      yoplait greek 100 blackberry pie yogurt
      yoplait greek 100 mango yogurt
      yoplait greek 100 pineapple yogurt
      yoplait greek 100 raspberry yogurt
      yoplait greek caramel yogurt
      yoplait greek key lime pie yogurt
      yoplait greek lemon meringue yogurt
      zest
      zucchini
    
  
  
    
      0
      Easy_-No-Bake_-Mud-Pie-1708171
      NaN
      0.0
      NaN
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      NaN
      0.0
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
      NaN
      0.0
    
    
      1
      Tita_s-Fruit-Dessert-_SundaySupper-1706994
      NaN
      0.0
      NaN
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      NaN
      0.0
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
      NaN
      0.0
    
    
      2
      No-Bake-Oreo-Cheesecake-1710284
      NaN
      0.0
      NaN
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      NaN
      0.0
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
      NaN
      0.0
    
  

3 rows × 1174 columns



In [101]:

    
for i in DS_ing.duplicated('id'):
    if i == True:
        print i
DS_ing = DS_ing.drop_duplicates('id')
DS_ing.shape









    



True






    Out[101]:





(1999, 1174)

Join all tables for Dessert



In [102]:

    
# set index to column 'id'
_df = [DS_main, DS_main_reduced, DS_cuisines, DS_flavors, DS_details, DS_ing, DS_ing_reduced]

for df in _df:
    df.set_index('id', inplace = True)



In [105]:

    
# join dataframes
DS_data = DS_main.join([DS_cuisines, DS_flavors, DS_details, DS_ing])
DS_data_reduced = DS_main_reduced.join([DS_flavors, DS_details, DS_ing_reduced])

# create a course column
DS_data['course'] = 'dessert'
DS_data_reduced ['course'] = 'dessert'



In [106]:

    
DS_data.shape









    Out[106]:





(1999, 1213)



In [107]:

    
# save into csv
DS_data.to_csv('DS_data.csv')
DS_data_reduced.to_csv('DS_data_reduced.csv')

	Unnamed: 0	id	rating	recipeName	sourceDisplayName
0	0	Easy_-No-Bake_-Mud-Pie-1708171	4	Easy, No Bake, Mud Pie	Ann's Entitled Life
1	1	Tita_s-Fruit-Dessert-_SundaySupper-1706994	4	Tita's Fruit Dessert #SundaySupper	Basic N Delicious
2	2	No-Bake-Oreo-Cheesecake-1710284	3	No Bake Oreo Cheesecake	Cook and Share

	id	rating	recipeName	sourceDisplayName
0	Lemonade-Popsicles-1682120	3	Lemonade Popsicles	Beachbody
1	Best-Peanut-Butter-Buttercream-Frosting-1682977	4	Best Peanut Butter Buttercream Frosting	Two Sisters Crafting
2	Omega-Chocolate-Bars-1695745	4	Omega Chocolate Bars	A Healthy Life For Me

	id	rating	recipeName	sourceDisplayName
0	Raw-Brownies-With-Frosting-1707657	4	Raw Brownies With Frosting	Planticize
1	Easy-Cinnamon-Roll-Cake-1552842	4	Easy Cinnamon Roll Cake	Cookies & Cups
2	Coconut-Ice-Cream-1711710	4	Coconut Ice Cream	Get Inspired Everyday!

	id	rating	recipeName	sourceDisplayName
0	Snickers-Chocolate-Cake-Mix-Bars-1710321	4	Snickers Chocolate Cake Mix Bars	Kleinworth Co.
1	Mango-Coconut-Popsicles-1699009	4	Mango Coconut Popsicles	Eat Good 4 Life
2	Man-Bars-1703874	4	Man Bars	South your mouth

	id	bitter	meaty	piquant	salty	sour	sweet
0	-No-Bake-Caramel-Cheesecake-Shooters-1706735	NaN	NaN	NaN	NaN	NaN	NaN
1	1-2-3-4-Cupcakes-760182	0.166667	0.333333	0.0	0.333333	0.166667	0.833333
2	1-Minute-Chocolate-Frosting-1690971	NaN	NaN	NaN	NaN	NaN	NaN

	id	bitter	meaty	piquant	salty	sour	sweet
0	10-Layer-Chocolate-Caramel-Mousse-Cake-1697890	0.166667	0.166667	0.0	0.5	0.166667	0.833333
1	2-Bite-Cherry-Pies-1709499	NaN	NaN	NaN	NaN	NaN	NaN
2	3-Ingredient-Brownies-1703671	0.166667	0.166667	0.0	0.0	0.166667	0.833333

	id	bitter	meaty	piquant	salty	sour	sweet
0	1-Minute-Sugar-Free-Peanut-Butter-Mug-Cake-169...	0.833333	0.833333	0.0	0.833333	0.166667	0.333333
1	10-Minute-Strawberry-Jam-1702695	0.000000	0.166667	0.0	0.000000	0.833333	0.666667
2	1_2-Syn-Strawberry-Mousse-1703461	NaN	NaN	NaN	NaN	NaN	NaN

	id	bitter	meaty	piquant	salty	sour	sweet
0	100-Calorie-Chocolate-Peanut-Butter-Squares-15...	NaN	NaN	NaN	NaN	NaN	NaN
1	15-Minute-Strawberry-Jam-1683439	0.0	0.166667	0.0	0.0	0.833333	0.666667
2	1_-Classic-Chocolate-Cake-1708247	NaN	NaN	NaN	NaN	NaN	NaN

	id	cookTimeInSeconds	ingredientCount	numberOfServings	prepTimeInSeconds	totalTimeInSeconds
0	Easy_-No-Bake_-Mud-Pie-1708171	11700.0	5	6	900.0	12600
1	Tita_s-Fruit-Dessert-_SundaySupper-1706994	NaN	6	4	900.0	900
2	No-Bake-Oreo-Cheesecake-1710284	NaN	6	4	1800.0	1800

	id	cookTimeInSeconds	ingredientCount	numberOfServings	prepTimeInSeconds	totalTimeInSeconds
0	Lemonade-Popsicles-1682120	NaN	3	4.0	600.0	15000
1	Best-Peanut-Butter-Buttercream-Frosting-1682977	NaN	5	4.0	NaN	1200
2	Omega-Chocolate-Bars-1695745	120.0	5	25.0	1080.0	1200

	id	cookTimeInSeconds	ingredientCount	numberOfServings	prepTimeInSeconds	totalTimeInSeconds
0	Snickers-Chocolate-Cake-Mix-Bars-1710321	3300.0	4	4	480.0	3780
1	Mango-Coconut-Popsicles-1699009	NaN	5	10	NaN	300
2	Man-Bars-1703874	NaN	7	24	NaN	2400

	id	course	...
0	Easy_-No-Bake_-Mud-Pie-1708171	Breakfast and Brunch	...
1	Tita_s-Fruit-Dessert-_SundaySupper-1706994	Breakfast and Brunch	...
2	No-Bake-Oreo-Cheesecake-1710284	Breakfast and Brunch	...

	9 inch chocolate crumb crust	agar	...	yoplait greek 100 apple pie yogurt	yoplait greek 100 mango yogurt	yoplait greek 100 pineapple yogurt	yoplait greek 100 raspberry yogurt	yoplait greek caramel yogurt	zest
0	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN
1	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN
2	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN

	9 inch chocolate crumb crust	agar	...	yoplait greek 100 apple pie yogurt	yoplait greek 100 mango yogurt	yoplait greek 100 pineapple yogurt	yoplait greek 100 raspberry yogurt	yoplait greek caramel yogurt	zest
0	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN
1	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN
2	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN