In [1]:

    
import pandas as pd
import numpy as np

Salads

Main



In [2]:

    
df = pd.read_csv('SLD_main.csv')
df1 = pd.read_csv('SLD_main_1.csv')
df2 = pd.read_csv('SLD_main_2.csv')
df3 = pd.read_csv('SLD_main_3.csv')



In [3]:

    
print df.shape
df.head(3)









    



(500, 5)






    Out[3]:






  
    
      
      Unnamed: 0
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      0
      Greek-Pasta-Salad-1712241
      4
      Greek Pasta Salad
      Live Serendipity
    
    
      1
      1
      Caprese-Salad-Recipe-With-Tomatoes_-Basil-And-...
      4
      Caprese Salad Recipe With Tomatoes, Basil And ...
      Melanie Cooks
    
    
      2
      2
      Grandmas-Cucumber-Salad-1710308
      4
      Grandma’s Cucumber Salad
      Chin Deep



In [4]:

    
df = df.drop('Unnamed: 0', 1)



In [5]:

    
print df1.shape
df1 = df1.drop('Unnamed: 0', 1)
df1.head(3)









    



(501, 5)






    Out[5]:






  
    
      
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      Apple-Coleslaw-1680966
      4
      Apple Coleslaw
      FoodForYourGood.com
    
    
      1
      Watermelon-Salad-with-Mint-and-Feta-1668127
      4
      Watermelon Salad with Mint and Feta
      The Organic Kitchen
    
    
      2
      Sweet-Kale-Salad-1693185
      4
      Sweet Kale Salad
      ifoodreal



In [6]:

    
print df2.shape
df2 = df2.drop('Unnamed: 0', 1)
df2.head(3)









    



(500, 5)






    Out[6]:






  
    
      
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      Lemony-Coleslaw-with-Apples-1700377
      3
      Lemony Coleslaw with Apples
      Weight Watchers
    
    
      1
      Thai-Curry-Pasta-Salad-_Vegan_-Gluten-Free_-17...
      3
      Thai Curry Pasta Salad [Vegan, Gluten-Free]
      One Green Planet
    
    
      2
      Paleo-Ranch-Dressing-1620911
      4
      Paleo Ranch Dressing
      Follow the Ruels



In [7]:

    
print df3.shape
df3 = df3.drop('Unnamed: 0', 1)
df3.head(3)









    



(500, 5)






    Out[7]:






  
    
      
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      Cucumber-Chickpea-Salad-with-Balsamic-Tahini-D...
      4
      Cucumber Chickpea Salad with Balsamic Tahini D...
      Veggie Inspired
    
    
      1
      Cucumber-Tomato-Salad-1102770
      4
      Cucumber Tomato Salad
      Barefeet In The Kitchen
    
    
      2
      Quinoa-Tabbouleh-Salad-The-Shiksa-Blog-48849
      5
      Quinoa Tabbouleh Salad
      Tori Avey



In [8]:

    
#concatenate the main tables. 
SLD_main= pd.concat([df, df1, df2, df3])
#create a new dataframe with selected columns
SLD_main_reduced = SLD_main.drop(['recipeName', 'sourceDisplayName'], axis = 1)



In [9]:

    
#peek at dataframe
print SLD_main.shape
SLD_main.head(3)









    



(2001, 4)






    Out[9]:






  
    
      
      id
      rating
      recipeName
      sourceDisplayName
    
  
  
    
      0
      Greek-Pasta-Salad-1712241
      4
      Greek Pasta Salad
      Live Serendipity
    
    
      1
      Caprese-Salad-Recipe-With-Tomatoes_-Basil-And-...
      4
      Caprese Salad Recipe With Tomatoes, Basil And ...
      Melanie Cooks
    
    
      2
      Grandmas-Cucumber-Salad-1710308
      4
      Grandma’s Cucumber Salad
      Chin Deep



In [10]:

    
for i in SLD_main.duplicated('id'):
    if i == True:
        print i
SLD_main = SLD_main.drop_duplicates('id')
SLD_main.shape









    



True






    Out[10]:





(2000, 4)

Flavors



In [11]:

    
fdf = pd.read_csv('SLD_flavors.csv')
fdf1 = pd.read_csv('SLD_flavors_1.csv')
fdf2 = pd.read_csv('SLD_flavors_2.csv')
fdf3 = pd.read_csv('SLD_flavors_3.csv')



In [12]:

    
print fdf.shape
fdf = fdf.drop('Unnamed: 0', 1)
fdf = fdf.rename(columns = {'index':'id'})
fdf.head(3)









    



(500, 8)






    Out[12]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      10-Minute-Thai-Shrimp_-Cucumber-_-Avocado-Sala...
      0.500000
      0.166667
      0.166667
      0.833333
      0.166667
      0.166667
    
    
      1
      3-Ingredient-Cucumber-Dill-Ribbon-Salad-1624433
      0.166667
      0.166667
      0.000000
      0.166667
      0.166667
      0.166667
    
    
      2
      5-Ingredient-Corn-Salad-1695247
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN



In [13]:

    
print fdf1.shape
fdf1 = fdf1.drop('Unnamed: 0', 1)
fdf1 = fdf1.rename(columns = {'index':'id'})
fdf1.head(3)









    



(501, 8)






    Out[13]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      1-Minute-Lentil-Salad-1639395
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      1
      10-Minute-Tomato-and-Walnut-Salad-1681561
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      4-Ingredient-Chickpea-Salad-1708777
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN



In [14]:

    
print fdf2.shape
fdf2 = fdf2.drop('Unnamed: 0', 1)
fdf2 = fdf2.rename(columns = {'index':'id'})
fdf2.head(3)









    



(500, 8)






    Out[14]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      3-Healthy-Salad-Dressings_-1707545
      0.166667
      0.833333
      0.166667
      0.500000
      0.500000
      0.166667
    
    
      1
      4-Ingredient-Frito-_Salad_-1641651
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
    
    
      2
      4-Ingredient-Tangy-Potato-Salad-1641682
      0.166667
      0.166667
      0.166667
      0.166667
      0.833333
      0.166667



In [15]:

    
print fdf3.shape
fdf3 = fdf3.drop('Unnamed: 0', 1)
fdf3 = fdf3.rename(columns = {'index':'id'})
fdf3.head(3)









    



(500, 8)






    Out[15]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      15-Minute-Greek-Cucumber-Salad-1119811
      0.166667
      0.166667
      0.333333
      0.500000
      0.333333
      0.166667
    
    
      1
      5-Minute-Apple-Cranberry-Salad-1336215
      0.166667
      0.166667
      0.000000
      0.166667
      0.166667
      0.500000
    
    
      2
      A-Sweet-And-Sour-Side-Salad-1627813
      0.166667
      0.166667
      0.000000
      0.000000
      0.166667
      0.166667



In [16]:

    
#concatenate the flavors tables. 
SLD_flavors= pd.concat([fdf, fdf1, fdf2, fdf3])



In [17]:

    
#peek at dataframe
print SLD_flavors.shape
SLD_flavors.head(3)









    



(2001, 7)






    Out[17]:






  
    
      
      id
      bitter
      meaty
      piquant
      salty
      sour
      sweet
    
  
  
    
      0
      10-Minute-Thai-Shrimp_-Cucumber-_-Avocado-Sala...
      0.500000
      0.166667
      0.166667
      0.833333
      0.166667
      0.166667
    
    
      1
      3-Ingredient-Cucumber-Dill-Ribbon-Salad-1624433
      0.166667
      0.166667
      0.000000
      0.166667
      0.166667
      0.166667
    
    
      2
      5-Ingredient-Corn-Salad-1695247
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN



In [18]:

    
for i in SLD_flavors.duplicated('id'):
    if i == True:
        print i
SLD_flavors = SLD_flavors.drop_duplicates('id')
SLD_flavors.shape









    



True






    Out[18]:





(2000, 7)

Cuisine



In [19]:

    
cdf = pd.read_csv('SLD_cuisines.csv')
cdf1 = pd.read_csv('SLD_cuisines_1.csv')
cdf2 = pd.read_csv('SLD_cuisines_2.csv')
cdf3 = pd.read_csv('SLD_cuisines_3.csv')



In [20]:

    
print cdf.shape
cdf = cdf.drop('Unnamed: 0', 1)
cdf = cdf.rename(columns = {'index':'id'})
print cdf.columns
cdf.head(3)









    



(500, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')






    Out[20]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      10-Minute-Thai-Shrimp_-Cucumber-_-Avocado-Sala...
      0
      1
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      3-Ingredient-Cucumber-Dill-Ribbon-Salad-1624433
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      5-Ingredient-Corn-Salad-1695247
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [21]:

    
print cdf1.shape
cdf1 = cdf1.drop('Unnamed: 0', 1)
cdf1 = cdf1.rename(columns = {'index':'id'})
print cdf1.columns
cdf1.head(3)









    



(501, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')






    Out[21]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      1-Minute-Lentil-Salad-1639395
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      10-Minute-Tomato-and-Walnut-Salad-1681561
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      4-Ingredient-Chickpea-Salad-1708777
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [22]:

    
print cdf2.shape
cdf2 = cdf2.drop('Unnamed: 0', 1)
cdf2 = cdf2.rename(columns = {'index':'id'})
print cdf2.columns
cdf2.head(3)









    



(500, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')






    Out[22]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      3-Healthy-Salad-Dressings_-1707545
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      4-Ingredient-Frito-_Salad_-1641651
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      4-Ingredient-Tangy-Potato-Salad-1641682
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [23]:

    
print cdf3.shape
cdf3 = cdf3.drop('Unnamed: 0', 1)
cdf3 = cdf3.rename(columns = {'index':'id'})
print cdf3.columns
cdf3.head(3)









    



(500, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')






    Out[23]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      15-Minute-Greek-Cucumber-Salad-1119811
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      5-Minute-Apple-Cranberry-Salad-1336215
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      A-Sweet-And-Sour-Side-Salad-1627813
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [24]:

    
#concatenate the cuisine tables. 
SLD_cuisines= pd.concat([cdf, cdf1, cdf2, cdf3])



In [25]:

    
#peek at dataframe
print SLD_cuisines.shape
SLD_cuisines.head(3)









    



(2001, 27)






    Out[25]:






  
    
      
      id
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      French
      German
      ...
      Kid-Friendly
      Mediterranean
      Mexican
      Moroccan
      Portuguese
      Southern & Soul Food
      Southwestern
      Spanish
      Swedish
      Thai
    
  
  
    
      0
      10-Minute-Thai-Shrimp_-Cucumber-_-Avocado-Sala...
      0
      1
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      1
      3-Ingredient-Cucumber-Dill-Ribbon-Salad-1624433
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
    
      2
      5-Ingredient-Corn-Salad-1695247
      0
      0
      0
      0
      0
      0
      0
      0
      0
      ...
      0
      0
      0
      0
      0
      0
      0
      0
      0
      0
    
  

3 rows × 27 columns



In [26]:

    
for i in SLD_cuisines.duplicated('id'):
    if i == True:
        print i
SLD_cuisines = SLD_cuisines.drop_duplicates('id')
SLD_cuisines.shape









    



True






    Out[26]:





(2000, 27)

Details



In [27]:

    
ddf = pd.read_csv('SLD_details.csv')
ddf1 = pd.read_csv('SLD_details_1.csv')
ddf2 = pd.read_csv('SLD_details_2.csv')
ddf3 = pd.read_csv('SLD_details_3.csv')



In [28]:

    
print ddf.shape
ddf = ddf.drop('Unnamed: 0', 1)
print ddf.columns
ddf.head(3)









    



(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')






    Out[28]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Greek-Pasta-Salad-1712241
      NaN
      7
      4
      NaN
      2400
    
    
      1
      Caprese-Salad-Recipe-With-Tomatoes_-Basil-And-...
      NaN
      7
      4
      NaN
      900
    
    
      2
      Grandmas-Cucumber-Salad-1710308
      NaN
      9
      8
      NaN
      1500



In [29]:

    
print ddf1.shape
ddf1 = ddf1.drop('Unnamed: 0', 1)
print ddf1.columns
ddf1.head(3)









    



(501, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')






    Out[29]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Apple-Coleslaw-1680966
      NaN
      8
      4
      NaN
      1200
    
    
      1
      Watermelon-Salad-with-Mint-and-Feta-1668127
      NaN
      5
      4
      600.0
      600
    
    
      2
      Sweet-Kale-Salad-1693185
      NaN
      15
      4
      NaN
      1200



In [30]:

    
print ddf2.shape
ddf2 = ddf2.drop('Unnamed: 0', 1)
print ddf2.columns
ddf2.head(3)









    



(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')






    Out[30]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Lemony-Coleslaw-with-Apples-1700377
      NaN
      11
      12
      900.0
      900
    
    
      1
      Thai-Curry-Pasta-Salad-_Vegan_-Gluten-Free_-17...
      NaN
      21
      1
      NaN
      1800
    
    
      2
      Paleo-Ranch-Dressing-1620911
      NaN
      12
      4
      NaN
      600



In [31]:

    
print ddf3.shape
ddf3 = ddf3.drop('Unnamed: 0', 1)
print ddf3.columns
ddf3.head(3)









    



(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')






    Out[31]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Cucumber-Chickpea-Salad-with-Balsamic-Tahini-D...
      NaN
      12
      4
      NaN
      1200.0
    
    
      1
      Cucumber-Tomato-Salad-1102770
      NaN
      8
      7
      NaN
      900.0
    
    
      2
      Quinoa-Tabbouleh-Salad-The-Shiksa-Blog-48849
      NaN
      8
      7
      NaN
      2400.0



In [32]:

    
#concatenate the details tables. 
SLD_details= pd.concat([ddf, ddf1, ddf2, ddf3])



In [33]:

    
#peek at dataframe
print SLD_details.shape
SLD_details.head(3)









    



(2001, 6)






    Out[33]:






  
    
      
      id
      cookTimeInSeconds
      ingredientCount
      numberOfServings
      prepTimeInSeconds
      totalTimeInSeconds
    
  
  
    
      0
      Greek-Pasta-Salad-1712241
      NaN
      7
      4
      NaN
      2400.0
    
    
      1
      Caprese-Salad-Recipe-With-Tomatoes_-Basil-And-...
      NaN
      7
      4
      NaN
      900.0
    
    
      2
      Grandmas-Cucumber-Salad-1710308
      NaN
      9
      8
      NaN
      1500.0



In [34]:

    
for i in SLD_details.duplicated('id'):
    if i == True:
        print i
SLD_details = SLD_details.drop_duplicates('id')
SLD_details.shape









    



True






    Out[34]:





(2000, 6)

Ingredients



In [35]:

    
idf = pd.read_csv('SLD_ingredients.csv')
idf1 = pd.read_csv('SLD_ingredients_1.csv')
idf2 = pd.read_csv('SLD_ingredients_2.csv')
idf3 = pd.read_csv('SLD_ingredients_3.csv')



In [36]:

    
print idf.shape
idf = idf.drop('Unnamed: 0', 1)
print idf.columns
idf.head(3)









    



(500, 733)
Index([u'id', u'course', u'agave nectar', u'almond butter', u'almonds',
       u'anchovy fillets', u'anchovy paste', u'apple butter',
       u'apple cider vinegar', u'apples',
       ...
       u'whole kernel corn', u'whole milk', u'whole wheat cheese tortellini',
       u'wine vinegar', u'worcestershire sauce', u'yellow bell pepper',
       u'yellow mustard', u'yoghurt', u'zucchini', u'zucchini noodles'],
      dtype='object', length=732)






    Out[36]:






  
    
      
      id
      course
      agave nectar
      almond butter
      almonds
      anchovy fillets
      anchovy paste
      apple butter
      apple cider vinegar
      apples
      ...
      whole kernel corn
      whole milk
      whole wheat cheese tortellini
      wine vinegar
      worcestershire sauce
      yellow bell pepper
      yellow mustard
      yoghurt
      zucchini
      zucchini noodles
    
  
  
    
      0
      Greek-Pasta-Salad-1712241
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      Caprese-Salad-Recipe-With-Tomatoes_-Basil-And-...
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      Grandmas-Cucumber-Salad-1710308
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      1.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

3 rows × 732 columns



In [37]:

    
print idf1.shape
idf1 = idf1.drop('Unnamed: 0', 1)
print idf1.columns
idf1.head(3)









    



(501, 752)
Index([u'id', u'course', u'agave nectar', u'aged balsamic vinegar',
       u'allspice', u'almond butter', u'almond oil', u'almonds',
       u'anchovy fillets', u'anchovy paste',
       ...
       u'yellow onion', u'yellow peppers', u'yellow squash', u'yellow tomato',
       u'yoghurt', u'yogurt', u'yukon gold', u'yukon gold potatoes', u'zest',
       u'zucchini'],
      dtype='object', length=751)






    Out[37]:






  
    
      
      id
      course
      agave nectar
      aged balsamic vinegar
      allspice
      almond butter
      almond oil
      almonds
      anchovy fillets
      anchovy paste
      ...
      yellow onion
      yellow peppers
      yellow squash
      yellow tomato
      yoghurt
      yogurt
      yukon gold
      yukon gold potatoes
      zest
      zucchini
    
  
  
    
      0
      Apple-Coleslaw-1680966
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      Watermelon-Salad-with-Mint-and-Feta-1668127
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      Sweet-Kale-Salad-1693185
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

3 rows × 751 columns



In [38]:

    
print idf2.shape
idf2 = idf2.drop('Unnamed: 0', 1)
print idf2.columns
idf2.head(3)









    



(500, 766)
Index([u'id', u'course', u'agave nectar', u'alfalfa sprouts',
       u'all purpose potatoes', u'allspice', u'almond butter', u'almond milk',
       u'almonds', u'anchovy fillets',
       ...
       u'yellow mustard', u'yellow onion', u'yellow peppers',
       u'yellow summer squash', u'yoghurt', u'yukon gold potatoes', u'za'atar',
       u'zesty italian dressing', u'ziti', u'zucchini'],
      dtype='object', length=765)






    Out[38]:






  
    
      
      id
      course
      agave nectar
      alfalfa sprouts
      all purpose potatoes
      allspice
      almond butter
      almond milk
      almonds
      anchovy fillets
      ...
      yellow mustard
      yellow onion
      yellow peppers
      yellow summer squash
      yoghurt
      yukon gold potatoes
      za'atar
      zesty italian dressing
      ziti
      zucchini
    
  
  
    
      0
      Lemony-Coleslaw-with-Apples-1700377
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      Thai-Curry-Pasta-Salad-_Vegan_-Gluten-Free_-17...
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      Paleo-Ranch-Dressing-1620911
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

3 rows × 765 columns



In [39]:

    
print idf3.shape
idf3 = idf3.drop('Unnamed: 0', 1)
print idf3.columns
idf3.head(3)









    



(500, 761)
Index([u'id', u'course', u'adobo sauce', u'agave nectar',
       u'aged balsamic vinegar', u'alessi sea salt', u'almond butter',
       u'almonds', u'anchovies', u'anchovy paste',
       ...
       u'yellow mustard', u'yellow onion', u'yellow peppers', u'yellow squash',
       u'yellow summer squash', u'yoghurt', u'yukon gold potatoes', u'za'atar',
       u'zest', u'zucchini'],
      dtype='object', length=760)






    Out[39]:






  
    
      
      id
      course
      adobo sauce
      agave nectar
      aged balsamic vinegar
      alessi sea salt
      almond butter
      almonds
      anchovies
      anchovy paste
      ...
      yellow mustard
      yellow onion
      yellow peppers
      yellow squash
      yellow summer squash
      yoghurt
      yukon gold potatoes
      za'atar
      zest
      zucchini
    
  
  
    
      0
      Cucumber-Chickpea-Salad-with-Balsamic-Tahini-D...
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      1
      Cucumber-Tomato-Salad-1102770
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
    
      2
      Quinoa-Tabbouleh-Salad-The-Shiksa-Blog-48849
      Breakfast and Brunch
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      ...
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
      0.0
    
  

3 rows × 760 columns



In [40]:

    
#concatenate the ingredients tables. 
SLD_ing= pd.concat([idf, idf1, idf2, idf3])
#create a new dataframe with selected columns
SLD_ing_reduced = SLD_ing[['id', 'ingredient_list']]



In [41]:

    
SLD_ing.head(3)









    Out[41]:






  
    
      
      adobo sauce
      agave nectar
      aged balsamic vinegar
      alessi sea salt
      alfalfa sprouts
      all purpose potatoes
      allspice
      almond butter
      almond milk
      almond oil
      ...
      yoghurt
      yogurt
      yukon gold
      yukon gold potatoes
      za'atar
      zest
      zesty italian dressing
      ziti
      zucchini
      zucchini noodles
    
  
  
    
      0
      NaN
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      NaN
      NaN
      ...
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
    
    
      1
      NaN
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      NaN
      NaN
      ...
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
    
    
      2
      NaN
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      NaN
      NaN
      ...
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
    
  

3 rows × 1397 columns



In [45]:

    
#drop unnamed column & make id first column
cols = list(SLD_ing)
cols.insert(0, cols.pop(cols.index('id')))
SLD_ing = SLD_ing.ix[:, cols]



In [46]:

    
SLD_ing.head(3)









    Out[46]:






  
    
      
      id
      adobo sauce
      agave nectar
      aged balsamic vinegar
      alessi sea salt
      alfalfa sprouts
      all purpose potatoes
      allspice
      almond butter
      almond milk
      ...
      yoghurt
      yogurt
      yukon gold
      yukon gold potatoes
      za'atar
      zest
      zesty italian dressing
      ziti
      zucchini
      zucchini noodles
    
  
  
    
      0
      Greek-Pasta-Salad-1712241
      NaN
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      NaN
      ...
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
    
    
      1
      Caprese-Salad-Recipe-With-Tomatoes_-Basil-And-...
      NaN
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      NaN
      ...
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
    
    
      2
      Grandmas-Cucumber-Salad-1710308
      NaN
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      NaN
      ...
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
    
  

3 rows × 1397 columns



In [47]:

    
for i in SLD_ing.duplicated('id'):
    if i == True:
        print i
SLD_ing = SLD_ing.drop_duplicates('id')
SLD_ing.shape









    Out[47]:





(2000, 1397)

Join all tables for Salads



In [49]:

    
# set index to column 'id'
_df = [SLD_main, SLD_main_reduced, SLD_cuisines, SLD_flavors, SLD_details, SLD_ing, SLD_ing_reduced]

for df in _df:
    df.set_index('id', inplace = True)



In [50]:

    
# join dataframes
SLD_data = SLD_main.join([SLD_cuisines, SLD_flavors, SLD_details, SLD_ing])
SLD_data_reduced = SLD_main_reduced.join([SLD_flavors, SLD_details, SLD_ing_reduced])
# create a course column
SLD_data['course'] = 'salad'
SLD_data_reduced['course'] = 'salad'



In [51]:

    
SLD_data.head(3)









    Out[51]:






  
    
      
      rating
      recipeName
      sourceDisplayName
      American
      Asian
      Barbecue
      Cajun & Creole
      Chinese
      Cuban
      English
      ...
      yoghurt
      yogurt
      yukon gold
      yukon gold potatoes
      za'atar
      zest
      zesty italian dressing
      ziti
      zucchini
      zucchini noodles
    
    
      id
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
      
    
  
  
    
      Greek-Pasta-Salad-1712241
      4
      Greek Pasta Salad
      Live Serendipity
      0
      0
      0
      0
      0
      0
      0
      ...
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
    
    
      Caprese-Salad-Recipe-With-Tomatoes_-Basil-And-Mozarella-1710947
      4
      Caprese Salad Recipe With Tomatoes, Basil And ...
      Melanie Cooks
      0
      0
      0
      0
      0
      0
      0
      ...
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
    
    
      Grandmas-Cucumber-Salad-1710308
      4
      Grandma’s Cucumber Salad
      Chin Deep
      0
      0
      0
      0
      0
      0
      0
      ...
      0.0
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      NaN
      0.0
      0.0
    
  

3 rows × 1436 columns



In [52]:

    
#Save into a csv file
SLD_data.to_csv('SLD_data.csv')
SLD_data_reduced.to_csv('SLD_data_reduced.csv')

	Unnamed: 0	id	rating	recipeName	sourceDisplayName
0	0	Greek-Pasta-Salad-1712241	4	Greek Pasta Salad	Live Serendipity
1	1	Caprese-Salad-Recipe-With-Tomatoes_-Basil-And-...	4	Caprese Salad Recipe With Tomatoes, Basil And ...	Melanie Cooks
2	2	Grandmas-Cucumber-Salad-1710308	4	Grandma’s Cucumber Salad	Chin Deep

	id	rating	recipeName	sourceDisplayName
0	Apple-Coleslaw-1680966	4	Apple Coleslaw	FoodForYourGood.com
1	Watermelon-Salad-with-Mint-and-Feta-1668127	4	Watermelon Salad with Mint and Feta	The Organic Kitchen
2	Sweet-Kale-Salad-1693185	4	Sweet Kale Salad	ifoodreal

	id	rating	recipeName	sourceDisplayName
0	Lemony-Coleslaw-with-Apples-1700377	3	Lemony Coleslaw with Apples	Weight Watchers
1	Thai-Curry-Pasta-Salad-_Vegan_-Gluten-Free_-17...	3	Thai Curry Pasta Salad [Vegan, Gluten-Free]	One Green Planet
2	Paleo-Ranch-Dressing-1620911	4	Paleo Ranch Dressing	Follow the Ruels

	id	rating	recipeName	sourceDisplayName
0	Cucumber-Chickpea-Salad-with-Balsamic-Tahini-D...	4	Cucumber Chickpea Salad with Balsamic Tahini D...	Veggie Inspired
1	Cucumber-Tomato-Salad-1102770	4	Cucumber Tomato Salad	Barefeet In The Kitchen
2	Quinoa-Tabbouleh-Salad-The-Shiksa-Blog-48849	5	Quinoa Tabbouleh Salad	Tori Avey

	id	bitter	meaty	piquant	salty	sour	sweet
0	10-Minute-Thai-Shrimp_-Cucumber-_-Avocado-Sala...	0.500000	0.166667	0.166667	0.833333	0.166667	0.166667
1	3-Ingredient-Cucumber-Dill-Ribbon-Salad-1624433	0.166667	0.166667	0.000000	0.166667	0.166667	0.166667
2	5-Ingredient-Corn-Salad-1695247	NaN	NaN	NaN	NaN	NaN	NaN

	id	bitter	meaty	piquant	salty	sour	sweet
0	1-Minute-Lentil-Salad-1639395	NaN	NaN	NaN	NaN	NaN	NaN
1	10-Minute-Tomato-and-Walnut-Salad-1681561	NaN	NaN	NaN	NaN	NaN	NaN
2	4-Ingredient-Chickpea-Salad-1708777	NaN	NaN	NaN	NaN	NaN	NaN

	id	bitter	meaty	piquant	salty	sour	sweet
0	3-Healthy-Salad-Dressings_-1707545	0.166667	0.833333	0.166667	0.500000	0.500000	0.166667
1	4-Ingredient-Frito-_Salad_-1641651	NaN	NaN	NaN	NaN	NaN	NaN
2	4-Ingredient-Tangy-Potato-Salad-1641682	0.166667	0.166667	0.166667	0.166667	0.833333	0.166667

	id	bitter	meaty	piquant	salty	sour	sweet
0	15-Minute-Greek-Cucumber-Salad-1119811	0.166667	0.166667	0.333333	0.500000	0.333333	0.166667
1	5-Minute-Apple-Cranberry-Salad-1336215	0.166667	0.166667	0.000000	0.166667	0.166667	0.500000
2	A-Sweet-And-Sour-Side-Salad-1627813	0.166667	0.166667	0.000000	0.000000	0.166667	0.166667

	id	cookTimeInSeconds	ingredientCount	numberOfServings	prepTimeInSeconds	totalTimeInSeconds
0	Greek-Pasta-Salad-1712241	NaN	7	4	NaN	2400
1	Caprese-Salad-Recipe-With-Tomatoes_-Basil-And-...	NaN	7	4	NaN	900
2	Grandmas-Cucumber-Salad-1710308	NaN	9	8	NaN	1500

	id	cookTimeInSeconds	ingredientCount	numberOfServings	prepTimeInSeconds	totalTimeInSeconds
0	Apple-Coleslaw-1680966	NaN	8	4	NaN	1200
1	Watermelon-Salad-with-Mint-and-Feta-1668127	NaN	5	4	600.0	600
2	Sweet-Kale-Salad-1693185	NaN	15	4	NaN	1200

	id	cookTimeInSeconds	ingredientCount	numberOfServings	prepTimeInSeconds	totalTimeInSeconds
0	Lemony-Coleslaw-with-Apples-1700377	NaN	11	12	900.0	900
1	Thai-Curry-Pasta-Salad-_Vegan_-Gluten-Free_-17...	NaN	21	1	NaN	1800
2	Paleo-Ranch-Dressing-1620911	NaN	12	4	NaN	600

	id	cookTimeInSeconds	ingredientCount	numberOfServings	prepTimeInSeconds	totalTimeInSeconds
0	Cucumber-Chickpea-Salad-with-Balsamic-Tahini-D...	NaN	12	4	NaN	1200.0
1	Cucumber-Tomato-Salad-1102770	NaN	8	7	NaN	900.0
2	Quinoa-Tabbouleh-Salad-The-Shiksa-Blog-48849	NaN	8	7	NaN	2400.0

	id	course	...	whole milk
0	Greek-Pasta-Salad-1712241	Breakfast and Brunch	...	0.0
1	Caprese-Salad-Recipe-With-Tomatoes_-Basil-And-...	Breakfast and Brunch	...	0.0
2	Grandmas-Cucumber-Salad-1710308	Breakfast and Brunch	...	1.0

	adobo sauce	aged balsamic vinegar	alessi sea salt	alfalfa sprouts	all purpose potatoes	allspice	almond milk	almond oil	...	yogurt	yukon gold	yukon gold potatoes	za'atar	zest	zesty italian dressing	ziti
0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN

	adobo sauce	aged balsamic vinegar	alessi sea salt	alfalfa sprouts	all purpose potatoes	allspice	almond milk	almond oil	...	yogurt	yukon gold	yukon gold potatoes	za'atar	zest	zesty italian dressing	ziti
0	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN
1	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN
2	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN