In [1]:
    
import pandas as pd
import numpy as np
    
In [2]:
    
df = pd.read_csv('APP_main.csv')
df1 = pd.read_csv('APP_main_1.csv')
df2 = pd.read_csv('APP_main_2.csv')
df3 = pd.read_csv('APP_main_3.csv')
    
In [3]:
    
print df1.shape
df3.head()
    
    
    Out[3]:
In [4]:
    
df = df.drop('Unnamed: 0', 1)
    
In [5]:
    
print df1.shape
df1 = df1.drop('Unnamed: 0', 1)
df1.head(3)
    
    
    Out[5]:
In [6]:
    
print df2.shape
df2 = df2.drop('Unnamed: 0', 1)
df2.head(3)
    
    
    Out[6]:
In [7]:
    
print df3.shape
df3 = df3.drop('Unnamed: 0', 1)
df3.head(3)
    
    
    Out[7]:
In [8]:
    
#concatenate the main tables. 
APP_main= pd.concat([df, df1, df2, df3])
    
In [9]:
    
#peek at dataframe
print APP_main.shape
APP_main.id.duplicated().sum()
    
    
    Out[9]:
In [11]:
    
for i in APP_main.duplicated('id'):
    if i == True:
        print i
    
In [12]:
    
fdf = pd.read_csv('APP_flavors.csv')
fdf1 = pd.read_csv('APP_flavors_1.csv')
fdf2 = pd.read_csv('APP_flavors_2.csv')
fdf3 = pd.read_csv('APP_flavors_3.csv')
    
In [13]:
    
print fdf.shape
fdf = fdf.drop('Unnamed: 0', 1)
fdf = fdf.rename(columns = {'index':'id'})
fdf.head(3)
    
    
    Out[13]:
In [14]:
    
print fdf1.shape
fdf1 = fdf1.drop('Unnamed: 0', 1)
fdf1.head(3)
    
    
    Out[14]:
In [15]:
    
#change column name index to id
fdf1=fdf1.rename(columns= {'index': 'id'})
fdf1.columns
    
    Out[15]:
In [16]:
    
print fdf2.shape
fdf2 = fdf2.drop('Unnamed: 0', 1)
fdf2 = fdf2.rename(columns = {'index':'id'})
fdf2.head(3)
    
    
    Out[16]:
In [17]:
    
print fdf3.shape
fdf3 = fdf3.drop('Unnamed: 0', 1)
fdf3 = fdf3.rename(columns = {'index':'id'})
fdf3.head(3)
    
    
    Out[17]:
In [18]:
    
#concatenate the flavor tables. 
APP_flavors= pd.concat([fdf, fdf1, fdf2, fdf3])
    
In [19]:
    
#peek at dataframe
print APP_flavors.shape
APP_flavors.head(3)
    
    
    Out[19]:
In [27]:
    
for i in APP_flavors.duplicated('id'):
    if i == True:
        print i
    
In [21]:
    
cdf = pd.read_csv('APP_cuisines.csv')
cdf1 = pd.read_csv('APP_cuisines_1.csv')
cdf2 = pd.read_csv('APP_cuisines_2.csv')
cdf3 = pd.read_csv('APP_cuisines_3.csv')
    
In [22]:
    
print cdf.shape
cdf = cdf.drop('Unnamed: 0', 1)
cdf = cdf.rename(columns = {'index':'id'})
print cdf.columns
cdf.head(3)
    
    
    Out[22]:
In [23]:
    
print cdf1.shape
cdf1 = cdf1.drop('Unnamed: 0', 1)
print cdf1.columns
cdf1.head(3)
    
    
    Out[23]:
In [24]:
    
#change column name index to id
cdf1=cdf1.rename(columns= {'index': 'id'})
cdf1.columns
    
    Out[24]:
In [25]:
    
print cdf2.shape
cdf2 = cdf2.drop('Unnamed: 0', 1)
cdf2 = cdf2.rename(columns = {'index':'id'})
print cdf2.columns
cdf2.head(3)
    
    
    Out[25]:
In [26]:
    
print cdf3.shape
cdf3 = cdf3.drop('Unnamed: 0', 1)
cdf3 = cdf3.rename(columns = {'index':'id'})
print cdf3.columns
cdf3.head(3)
    
    
    Out[26]:
In [28]:
    
#concatenate the cuisine tables. 
APP_cuisines= pd.concat([cdf, cdf1, cdf2, cdf3])
    
In [29]:
    
#peek at dataframe
print APP_cuisines.shape
APP_cuisines.head(3)
    
    
    Out[29]:
In [30]:
    
for i in APP_cuisines.duplicated('id'):
    if i == True:
        print i
    
In [31]:
    
ddf = pd.read_csv('APP_details.csv')
ddf1 = pd.read_csv('APP_details_1.csv')
ddf2 = pd.read_csv('APP_details_2.csv')
ddf3 = pd.read_csv('APP_details_3.csv')
    
In [32]:
    
print ddf.shape
ddf = ddf.drop('Unnamed: 0', 1)
print ddf.columns
ddf.head(3)
    
    
    Out[32]:
In [33]:
    
print ddf1.shape
ddf1 = ddf1.drop('Unnamed: 0', 1)
print ddf1.columns
ddf1.head(3)
    
    
    Out[33]:
In [34]:
    
print ddf2.shape
ddf2 = ddf2.drop('Unnamed: 0', 1)
print ddf2.columns
ddf2.head(3)
    
    
    Out[34]:
In [35]:
    
print ddf3.shape
ddf3 = ddf3.drop('Unnamed: 0', 1)
print ddf3.columns
ddf3.head(3)
    
    
    Out[35]:
In [36]:
    
#concatenate the details tables. 
APP_details= pd.concat([ddf, ddf1, ddf2, ddf3])
    
In [37]:
    
#peek at dataframe
print APP_details.shape
APP_details.head(3)
    
    
    Out[37]:
In [38]:
    
for i in APP_details.duplicated('id'):
    if i == True:
        print i
    
In [39]:
    
idf = pd.read_csv('APP_ingredients.csv')
idf1 = pd.read_csv('APP_ingredients_1.csv')
idf2 = pd.read_csv('APP_ingredients_2.csv')
idf3 = pd.read_csv('APP_ingredients_3.csv')
    
In [40]:
    
print idf.shape
idf = idf.drop('Unnamed: 0', 1)
print idf.columns
idf.head(3)
    
    
    Out[40]:
In [41]:
    
print idf1.shape
idf1 = idf1.drop('Unnamed: 0', 1)
print idf1.columns
idf1.head(3)
    
    
    Out[41]:
In [42]:
    
print idf2.shape
idf2 = idf2.drop('Unnamed: 0', 1)
print idf2.columns
idf2.head(3)
    
    
    Out[42]:
In [43]:
    
print idf3.shape
idf3 = idf3.drop('Unnamed: 0', 1)
print idf3.columns
idf3.head()
    
    
    Out[43]:
In [44]:
    
#concatenate the ingredient tables. 
APP_ing= pd.concat([idf, idf1, idf2, idf3])
    
In [45]:
    
APP_ing.head(3)
    
    Out[45]:
In [46]:
    
#drop unnamed column & make id first column
#APP_ing = APP_ing.drop('Unnamed: 0', 1)
cols = list(APP_ing)
cols.insert(0, cols.pop(cols.index('id')))
APP_ing = APP_ing.ix[:, cols]
    
In [47]:
    
APP_ing.head(3)
    
    Out[47]:
In [48]:
    
for i in APP_ing.duplicated('id'):
    if i == True:
        print i
    
In [49]:
    
# set index to column 'id'
_df = [APP_main, APP_cuisines, APP_flavors, APP_details, APP_ing]
for df in _df:
    df.set_index('id', inplace = True)
    
In [50]:
    
# join dataframes
APP_data = APP_main.join([APP_cuisines, APP_flavors, APP_details, APP_ing])
# add course column
APP_data['course'] = 'Appetizer'
    
In [52]:
    
APP_data.shape
APP_data.head(3)
    
    Out[52]:
In [53]:
    
#save to csv
APP_data.to_csv('APP_data.csv')