In [1]:
import pandas as pd
import numpy as np

Appetizers

  • Main

In [2]:
df = pd.read_csv('APP_main.csv')
df1 = pd.read_csv('APP_main_1.csv')
df2 = pd.read_csv('APP_main_2.csv')
df3 = pd.read_csv('APP_main_3.csv')

In [3]:
print df1.shape
df3.head()


(500, 5)
Out[3]:
Unnamed: 0 id rating recipeName sourceDisplayName
0 0 Pizza-dip-333915 5 Pizza Dip How Sweet It Is
1 1 The-Greatest-Hummus-Recipe-Ever_-1663684 4 The Greatest Hummus Recipe Ever! Gluten Free Yummy
2 2 Garlic-Herb-Cream-Cheese-Dip-972926 4 Garlic Herb Cream Cheese Dip Cooking On A Budget
3 3 Frank_s-RedHot-Buffalo-Chicken-Dip-1089684 3 Frank's RedHot Buffalo Chicken Dip Frank's Red Hot
4 4 Marinated-Japanese-Mushrooms-1102960 4 Marinated Japanese Mushrooms No Recipes

In [4]:
df = df.drop('Unnamed: 0', 1)

In [5]:
print df1.shape
df1 = df1.drop('Unnamed: 0', 1)
df1.head(3)


(500, 5)
Out[5]:
id rating recipeName sourceDisplayName
0 Skinny-Hummus-With-Zero-Tahini-1627413 4 Skinny Hummus With Zero Tahini The Kitchen Girl
1 Pico-De-Gallo-1682898 4 Pico De Gallo The Girl Who Ate Everything
2 Fiesta-Baked-Cheese-Dip-1696540 3 Fiesta Baked Cheese Dip Kraft

In [6]:
print df2.shape
df2 = df2.drop('Unnamed: 0', 1)
df2.head(3)


(500, 5)
Out[6]:
id rating recipeName sourceDisplayName
0 Warm-Spinach-and-Artichoke-Dip-1222123 4 Warm Spinach and Artichoke Dip Deliciously Ella
1 Honey-Garlic-Crockpot-Meatballs-1245745 4 Honey Garlic Crockpot Meatballs Family Fresh Meals
2 Cheesy-Spinach-_-Artichoke-Dip-1063719 4 Cheesy Spinach & Artichoke Dip Shibley Smiles

In [7]:
print df3.shape
df3 = df3.drop('Unnamed: 0', 1)
df3.head(3)


(500, 5)
Out[7]:
id rating recipeName sourceDisplayName
0 Pizza-dip-333915 5 Pizza Dip How Sweet It Is
1 The-Greatest-Hummus-Recipe-Ever_-1663684 4 The Greatest Hummus Recipe Ever! Gluten Free Yummy
2 Garlic-Herb-Cream-Cheese-Dip-972926 4 Garlic Herb Cream Cheese Dip Cooking On A Budget

In [8]:
#concatenate the main tables. 
APP_main= pd.concat([df, df1, df2, df3])

In [9]:
#peek at dataframe
print APP_main.shape
APP_main.id.duplicated().sum()


(2000, 4)
Out[9]:
0

In [11]:
for i in APP_main.duplicated('id'):
    if i == True:
        print i
  • Flavors

In [12]:
fdf = pd.read_csv('APP_flavors.csv')
fdf1 = pd.read_csv('APP_flavors_1.csv')
fdf2 = pd.read_csv('APP_flavors_2.csv')
fdf3 = pd.read_csv('APP_flavors_3.csv')

In [13]:
print fdf.shape
fdf = fdf.drop('Unnamed: 0', 1)
fdf = fdf.rename(columns = {'index':'id'})
fdf.head(3)


(500, 8)
Out[13]:
id bitter meaty piquant salty sour sweet
0 3-Ingredient-Campfire-Dip-1708222 NaN NaN NaN NaN NaN NaN
1 5-Ingredient-Jalapeno-Cheddar-Dip-1713067 0.666667 0.833333 0.833333 0.500000 0.166667 0.166667
2 5-Ingredient-Salmon-Spread-with-Lemon_-Dill-an... 0.500000 0.500000 0.000000 0.666667 0.500000 0.333333

In [14]:
print fdf1.shape
fdf1 = fdf1.drop('Unnamed: 0', 1)
fdf1.head(3)


(500, 8)
Out[14]:
index bitter meaty piquant salty sour sweet
0 2-Ingredient-Queso-Dip-1077872 0.833333 0.166667 0.833333 0.833333 0.166667 0.333333
1 3-Ingredient-Candied-Kielbasa-Bites-1689830 0.833333 0.333333 0.166667 0.833333 0.833333 0.666667
2 4-Cheese-Hot-Roasted-Red-Pepper-Dip-439566 NaN NaN NaN NaN NaN NaN

In [15]:
#change column name index to id
fdf1=fdf1.rename(columns= {'index': 'id'})
fdf1.columns


Out[15]:
Index([u'id', u'bitter', u'meaty', u'piquant', u'salty', u'sour', u'sweet'], dtype='object')

In [16]:
print fdf2.shape
fdf2 = fdf2.drop('Unnamed: 0', 1)
fdf2 = fdf2.rename(columns = {'index':'id'})
fdf2.head(3)


(500, 8)
Out[16]:
id bitter meaty piquant salty sour sweet
0 2-Minute-Avocado-Dip-1626429 0.833333 0.166667 0.833333 0.833333 0.666667 0.166667
1 7-Layer-Flag-Dip-for-4th-of-July_-1636222 NaN NaN NaN NaN NaN NaN
2 Arugula-and-Chive-Baked-Egg-Cups-1632193 0.333333 0.833333 0.000000 0.833333 0.166667 0.166667

In [17]:
print fdf3.shape
fdf3 = fdf3.drop('Unnamed: 0', 1)
fdf3 = fdf3.rename(columns = {'index':'id'})
fdf3.head(3)


(500, 8)
Out[17]:
id bitter meaty piquant salty sour sweet
0 3-Ingredient-Chili-Cheese-Dip-1004579 0.833333 0.833333 0.833333 0.833333 0.166667 0.166667
1 4-Ingredient-Cheese-Ball-1599980 0.833333 0.833333 0.000000 0.833333 0.166667 0.166667
2 4-ingredient-Sweet-_-Spicy-Wings-1225519 0.166667 0.833333 0.666667 0.166667 0.166667 0.500000

In [18]:
#concatenate the flavor tables. 
APP_flavors= pd.concat([fdf, fdf1, fdf2, fdf3])

In [19]:
#peek at dataframe
print APP_flavors.shape
APP_flavors.head(3)


(2000, 7)
Out[19]:
id bitter meaty piquant salty sour sweet
0 3-Ingredient-Campfire-Dip-1708222 NaN NaN NaN NaN NaN NaN
1 5-Ingredient-Jalapeno-Cheddar-Dip-1713067 0.666667 0.833333 0.833333 0.500000 0.166667 0.166667
2 5-Ingredient-Salmon-Spread-with-Lemon_-Dill-an... 0.500000 0.500000 0.000000 0.666667 0.500000 0.333333

In [27]:
for i in APP_flavors.duplicated('id'):
    if i == True:
        print i
  • Cuisine

In [21]:
cdf = pd.read_csv('APP_cuisines.csv')
cdf1 = pd.read_csv('APP_cuisines_1.csv')
cdf2 = pd.read_csv('APP_cuisines_2.csv')
cdf3 = pd.read_csv('APP_cuisines_3.csv')

In [22]:
print cdf.shape
cdf = cdf.drop('Unnamed: 0', 1)
cdf = cdf.rename(columns = {'index':'id'})
print cdf.columns
cdf.head(3)


(500, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')
Out[22]:
id American Asian Barbecue Cajun & Creole Chinese Cuban English French German ... Kid-Friendly Mediterranean Mexican Moroccan Portuguese Southern & Soul Food Southwestern Spanish Swedish Thai
0 3-Ingredient-Campfire-Dip-1708222 0 0 1 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
1 5-Ingredient-Jalapeno-Cheddar-Dip-1713067 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 5-Ingredient-Salmon-Spread-with-Lemon_-Dill-an... 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

3 rows × 27 columns


In [23]:
print cdf1.shape
cdf1 = cdf1.drop('Unnamed: 0', 1)
print cdf1.columns
cdf1.head(3)


(500, 28)
Index([u'index', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')
Out[23]:
index American Asian Barbecue Cajun & Creole Chinese Cuban English French German ... Kid-Friendly Mediterranean Mexican Moroccan Portuguese Southern & Soul Food Southwestern Spanish Swedish Thai
0 2-Ingredient-Queso-Dip-1077872 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 3-Ingredient-Candied-Kielbasa-Bites-1689830 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
2 4-Cheese-Hot-Roasted-Red-Pepper-Dip-439566 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

3 rows × 27 columns


In [24]:
#change column name index to id
cdf1=cdf1.rename(columns= {'index': 'id'})
cdf1.columns


Out[24]:
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')

In [25]:
print cdf2.shape
cdf2 = cdf2.drop('Unnamed: 0', 1)
cdf2 = cdf2.rename(columns = {'index':'id'})
print cdf2.columns
cdf2.head(3)


(500, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')
Out[25]:
id American Asian Barbecue Cajun & Creole Chinese Cuban English French German ... Kid-Friendly Mediterranean Mexican Moroccan Portuguese Southern & Soul Food Southwestern Spanish Swedish Thai
0 2-Minute-Avocado-Dip-1626429 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 7-Layer-Flag-Dip-for-4th-of-July_-1636222 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 Arugula-and-Chive-Baked-Egg-Cups-1632193 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

3 rows × 27 columns


In [26]:
print cdf3.shape
cdf3 = cdf3.drop('Unnamed: 0', 1)
cdf3 = cdf3.rename(columns = {'index':'id'})
print cdf3.columns
cdf3.head(3)


(500, 28)
Index([u'id', u'American', u'Asian', u'Barbecue', u'Cajun & Creole',
       u'Chinese', u'Cuban', u'English', u'French', u'German', u'Greek',
       u'Hawaiian', u'Hungarian', u'Indian', u'Irish', u'Italian', u'Japanese',
       u'Kid-Friendly', u'Mediterranean', u'Mexican', u'Moroccan',
       u'Portuguese', u'Southern & Soul Food', u'Southwestern', u'Spanish',
       u'Swedish', u'Thai'],
      dtype='object')
Out[26]:
id American Asian Barbecue Cajun & Creole Chinese Cuban English French German ... Kid-Friendly Mediterranean Mexican Moroccan Portuguese Southern & Soul Food Southwestern Spanish Swedish Thai
0 3-Ingredient-Chili-Cheese-Dip-1004579 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
1 4-Ingredient-Cheese-Ball-1599980 0 0 0 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
2 4-ingredient-Sweet-_-Spicy-Wings-1225519 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

3 rows × 27 columns


In [28]:
#concatenate the cuisine tables. 
APP_cuisines= pd.concat([cdf, cdf1, cdf2, cdf3])

In [29]:
#peek at dataframe
print APP_cuisines.shape
APP_cuisines.head(3)


(2000, 27)
Out[29]:
id American Asian Barbecue Cajun & Creole Chinese Cuban English French German ... Kid-Friendly Mediterranean Mexican Moroccan Portuguese Southern & Soul Food Southwestern Spanish Swedish Thai
0 3-Ingredient-Campfire-Dip-1708222 0 0 1 0 0 0 0 0 0 ... 1 0 0 0 0 0 0 0 0 0
1 5-Ingredient-Jalapeno-Cheddar-Dip-1713067 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0
2 5-Ingredient-Salmon-Spread-with-Lemon_-Dill-an... 0 0 0 0 0 0 0 0 0 ... 0 0 0 0 0 0 0 0 0 0

3 rows × 27 columns


In [30]:
for i in APP_cuisines.duplicated('id'):
    if i == True:
        print i
  • Details

In [31]:
ddf = pd.read_csv('APP_details.csv')
ddf1 = pd.read_csv('APP_details_1.csv')
ddf2 = pd.read_csv('APP_details_2.csv')
ddf3 = pd.read_csv('APP_details_3.csv')

In [32]:
print ddf.shape
ddf = ddf.drop('Unnamed: 0', 1)
print ddf.columns
ddf.head(3)


(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')
Out[32]:
id cookTimeInSeconds ingredientCount numberOfServings prepTimeInSeconds totalTimeInSeconds
0 Bacon-Cheddar-Pinwheels-768341 NaN 5 16 900.0 2100
1 Fiesta-Corn-Dip-1711704 NaN 7 4 900.0 900
2 Crunchy-Taco-Cups-1684027 NaN 5 12 NaN 2400

In [33]:
print ddf1.shape
ddf1 = ddf1.drop('Unnamed: 0', 1)
print ddf1.columns
ddf1.head(3)


(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')
Out[33]:
id cookTimeInSeconds ingredientCount numberOfServings prepTimeInSeconds totalTimeInSeconds
0 Skinny-Hummus-With-Zero-Tahini-1627413 NaN 8 12 NaN 600.0
1 Pico-De-Gallo-1682898 NaN 8 4 NaN 1200.0
2 Fiesta-Baked-Cheese-Dip-1696540 2400.0 7 32 1200.0 3600.0

In [34]:
print ddf2.shape
ddf2 = ddf2.drop('Unnamed: 0', 1)
print ddf2.columns
ddf2.head(3)


(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')
Out[34]:
id cookTimeInSeconds ingredientCount numberOfServings prepTimeInSeconds totalTimeInSeconds
0 Warm-Spinach-and-Artichoke-Dip-1222123 NaN 9 4 NaN 1800.0
1 Honey-Garlic-Crockpot-Meatballs-1245745 14400.0 6 7 300.0 14700.0
2 Cheesy-Spinach-_-Artichoke-Dip-1063719 NaN 6 4 NaN 2100.0

In [35]:
print ddf3.shape
ddf3 = ddf3.drop('Unnamed: 0', 1)
print ddf3.columns
ddf3.head(3)


(500, 7)
Index([u'id', u'cookTimeInSeconds', u'ingredientCount', u'numberOfServings',
       u'prepTimeInSeconds', u'totalTimeInSeconds'],
      dtype='object')
Out[35]:
id cookTimeInSeconds ingredientCount numberOfServings prepTimeInSeconds totalTimeInSeconds
0 Pizza-dip-333915 NaN 5 4 NaN 2100.0
1 The-Greatest-Hummus-Recipe-Ever_-1663684 NaN 7 5 900.0 900.0
2 Garlic-Herb-Cream-Cheese-Dip-972926 NaN 6 4 NaN 900.0

In [36]:
#concatenate the details tables. 
APP_details= pd.concat([ddf, ddf1, ddf2, ddf3])

In [37]:
#peek at dataframe
print APP_details.shape
APP_details.head(3)


(2000, 6)
Out[37]:
id cookTimeInSeconds ingredientCount numberOfServings prepTimeInSeconds totalTimeInSeconds
0 Bacon-Cheddar-Pinwheels-768341 NaN 5 16 900.0 2100.0
1 Fiesta-Corn-Dip-1711704 NaN 7 4 900.0 900.0
2 Crunchy-Taco-Cups-1684027 NaN 5 12 NaN 2400.0

In [38]:
for i in APP_details.duplicated('id'):
    if i == True:
        print i
  • Ingredients

In [39]:
idf = pd.read_csv('APP_ingredients.csv')
idf1 = pd.read_csv('APP_ingredients_1.csv')
idf2 = pd.read_csv('APP_ingredients_2.csv')
idf3 = pd.read_csv('APP_ingredients_3.csv')

In [40]:
print idf.shape
idf = idf.drop('Unnamed: 0', 1)
print idf.columns
idf.head(3)


(500, 758)
Index([u'id', u'course', u'(10 oz.) frozen spinach', u'agave nectar',
       u'aged balsamic vinegar', u'aioli', u'all-purpose flour', u'allspice',
       u'allspice berries', u'almonds',
       ...
       u'wonton wrappers', u'worcestershire sauce', u'yellow bell pepper',
       u'yellow corn', u'yellow mustard', u'yellow onion', u'yellow peppers',
       u'yoghurt', u'yoplait', u'zucchini'],
      dtype='object', length=757)
Out[40]:
id course (10 oz.) frozen spinach agave nectar aged balsamic vinegar aioli all-purpose flour allspice allspice berries almonds ... wonton wrappers worcestershire sauce yellow bell pepper yellow corn yellow mustard yellow onion yellow peppers yoghurt yoplait zucchini
0 Bacon-Cheddar-Pinwheels-768341 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 Fiesta-Corn-Dip-1711704 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 Crunchy-Taco-Cups-1684027 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

3 rows × 757 columns


In [41]:
print idf1.shape
idf1 = idf1.drop('Unnamed: 0', 1)
print idf1.columns
idf1.head(3)


(500, 738)
Index([u'id', u'course', u'accent', u'adobo', u'adobo sauce',
       u'aged balsamic vinegar', u'albacore', u'ale', u'all-purpose flour',
       u'almonds',
       ...
       u'wish bone ranch dress', u'wish-bone light buffalo ranch dressing',
       u'wonton wrappers', u'worcestershire sauce', u'yellow corn',
       u'yellow mustard', u'yellow onion', u'yellow peppers', u'yoghurt',
       u'zucchini'],
      dtype='object', length=737)
Out[41]:
id course accent adobo adobo sauce aged balsamic vinegar albacore ale all-purpose flour almonds ... wish bone ranch dress wish-bone light buffalo ranch dressing wonton wrappers worcestershire sauce yellow corn yellow mustard yellow onion yellow peppers yoghurt zucchini
0 Skinny-Hummus-With-Zero-Tahini-1627413 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 Pico-De-Gallo-1682898 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 Fiesta-Baked-Cheese-Dip-1696540 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

3 rows × 737 columns


In [42]:
print idf2.shape
idf2 = idf2.drop('Unnamed: 0', 1)
print idf2.columns
idf2.head(3)


(500, 746)
Index([u'id', u'course', u'(10 oz.) frozen spinach', u'adobo sauce',
       u'agave nectar', u'all-purpose flour', u'almond flour', u'almond milk',
       u'almonds', u'american cheese',
       ...
       u'worcestershire sauce', u'yellow mustard', u'yellow onion',
       u'yellow peppers', u'yoghurt', u'yoghurt natural low fat', u'za'atar',
       u'zesty italian dressing', u'zucchini', u'zucchini blossoms'],
      dtype='object', length=745)
Out[42]:
id course (10 oz.) frozen spinach adobo sauce agave nectar all-purpose flour almond flour almond milk almonds american cheese ... worcestershire sauce yellow mustard yellow onion yellow peppers yoghurt yoghurt natural low fat za'atar zesty italian dressing zucchini zucchini blossoms
0 Warm-Spinach-and-Artichoke-Dip-1222123 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 Honey-Garlic-Crockpot-Meatballs-1245745 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 Cheesy-Spinach-_-Artichoke-Dip-1063719 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

3 rows × 745 columns


In [43]:
print idf3.shape
idf3 = idf3.drop('Unnamed: 0', 1)
print idf3.columns
idf3.head()


(500, 708)
Index([u'id', u'course', u'adobo', u'agave nectar', u'aged cheddar cheese',
       u'all-purpose flour', u'almond flour', u'almond milk', u'almonds',
       u'american cheese',
       ...
       u'wonton wrappers', u'worcestershire sauce', u'yellow mustard',
       u'yellow onion', u'yoghurt', u'yoplait', u'yukon gold potatoes',
       u'za'atar', u'zesty italian dressing', u'zucchini'],
      dtype='object', length=707)
Out[43]:
id course adobo agave nectar aged cheddar cheese all-purpose flour almond flour almond milk almonds american cheese ... wonton wrappers worcestershire sauce yellow mustard yellow onion yoghurt yoplait yukon gold potatoes za'atar zesty italian dressing zucchini
0 Pizza-dip-333915 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
1 The-Greatest-Hummus-Recipe-Ever_-1663684 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
2 Garlic-Herb-Cream-Cheese-Dip-972926 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
3 Frank_s-RedHot-Buffalo-Chicken-Dip-1089684 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0
4 Marinated-Japanese-Mushrooms-1102960 Breakfast and Brunch 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0

5 rows × 707 columns


In [44]:
#concatenate the ingredient tables. 
APP_ing= pd.concat([idf, idf1, idf2, idf3])

In [45]:
APP_ing.head(3)


Out[45]:
(10 oz.) frozen spinach accent adobo adobo sauce agave nectar aged balsamic vinegar aged cheddar cheese aioli albacore ale ... yellow onion yellow peppers yoghurt yoghurt natural low fat yoplait yukon gold potatoes za'atar zesty italian dressing zucchini zucchini blossoms
0 0.0 NaN NaN NaN 0.0 0.0 NaN 0.0 NaN NaN ... 0.0 0.0 0.0 NaN 0.0 NaN NaN NaN 0.0 NaN
1 0.0 NaN NaN NaN 0.0 0.0 NaN 0.0 NaN NaN ... 0.0 0.0 0.0 NaN 0.0 NaN NaN NaN 0.0 NaN
2 0.0 NaN NaN NaN 0.0 0.0 NaN 0.0 NaN NaN ... 0.0 0.0 0.0 NaN 0.0 NaN NaN NaN 0.0 NaN

3 rows × 1438 columns


In [46]:
#drop unnamed column & make id first column
#APP_ing = APP_ing.drop('Unnamed: 0', 1)
cols = list(APP_ing)
cols.insert(0, cols.pop(cols.index('id')))
APP_ing = APP_ing.ix[:, cols]

In [47]:
APP_ing.head(3)


Out[47]:
id (10 oz.) frozen spinach accent adobo adobo sauce agave nectar aged balsamic vinegar aged cheddar cheese aioli albacore ... yellow onion yellow peppers yoghurt yoghurt natural low fat yoplait yukon gold potatoes za'atar zesty italian dressing zucchini zucchini blossoms
0 Bacon-Cheddar-Pinwheels-768341 0.0 NaN NaN NaN 0.0 0.0 NaN 0.0 NaN ... 0.0 0.0 0.0 NaN 0.0 NaN NaN NaN 0.0 NaN
1 Fiesta-Corn-Dip-1711704 0.0 NaN NaN NaN 0.0 0.0 NaN 0.0 NaN ... 0.0 0.0 0.0 NaN 0.0 NaN NaN NaN 0.0 NaN
2 Crunchy-Taco-Cups-1684027 0.0 NaN NaN NaN 0.0 0.0 NaN 0.0 NaN ... 0.0 0.0 0.0 NaN 0.0 NaN NaN NaN 0.0 NaN

3 rows × 1438 columns


In [48]:
for i in APP_ing.duplicated('id'):
    if i == True:
        print i

Join all tables for Appetizers


In [49]:
# set index to column 'id'
_df = [APP_main, APP_cuisines, APP_flavors, APP_details, APP_ing]

for df in _df:
    df.set_index('id', inplace = True)

In [50]:
# join dataframes
APP_data = APP_main.join([APP_cuisines, APP_flavors, APP_details, APP_ing])
# add course column
APP_data['course'] = 'Appetizer'

In [52]:
APP_data.shape
APP_data.head(3)


Out[52]:
rating recipeName sourceDisplayName American Asian Barbecue Cajun & Creole Chinese Cuban English ... yellow onion yellow peppers yoghurt yoghurt natural low fat yoplait yukon gold potatoes za'atar zesty italian dressing zucchini zucchini blossoms
id
Bacon-Cheddar-Pinwheels-768341 4 Bacon-Cheddar Pinwheels NaN 0 0 0 0 0 0 0 ... 0.0 0.0 0.0 NaN 0.0 NaN NaN NaN 0.0 NaN
Fiesta-Corn-Dip-1711704 4 Fiesta Corn Dip The Hill Hangout 0 0 0 0 0 0 0 ... 0.0 0.0 0.0 NaN 0.0 NaN NaN NaN 0.0 NaN
Crunchy-Taco-Cups-1684027 4 Crunchy Taco Cups Kevin and Amanda 0 0 0 0 0 0 0 ... 0.0 0.0 0.0 NaN 0.0 NaN NaN NaN 0.0 NaN

3 rows × 1477 columns


In [53]:
#save to csv
APP_data.to_csv('APP_data.csv')