In [1]:
import numpy as np
import pandas as pd
from pandas import Series,DataFrame
In [3]:
df1 = DataFrame({'key': ['X','Z','Y','Z','X','X'], 'data_set_1': np.arange(6)})
df1
Out[3]:
In [4]:
df2 = DataFrame({'key': ['Q','Y','Z'], 'data_set_2': [1,2,3]})
df2
Out[4]:
In [5]:
pd.merge(df1,df2)
Out[5]:
In [6]:
# merge using specific column
# this is equivalent to the last line
pd.merge(df1,df2,on='key')
Out[6]:
In [7]:
pd.merge(df1,df2,on='key',how='left')
Out[7]:
In [8]:
pd.merge(df1,df2,on='key',how='right')
Out[8]:
In [9]:
pd.merge(df1,df2,on='key',how='outer')
Out[9]:
In [11]:
# many to many merge
df3 = DataFrame({'key':['X','X','X','Y','Z','Z'],'data_set_3':range(6)})
df3
Out[11]:
In [12]:
df4 = DataFrame({'key':['Y','Y','X','X','Z'],'data_set_4': range(5)})
df4
Out[12]:
In [13]:
pd.merge(df3,df4)
Out[13]:
In [15]:
df_left = DataFrame({'key1':['SF','SF','LA'],
'key2':['one','two','one'],
'left_data':[10,20,30]})
df_left
Out[15]:
In [16]:
df_right = DataFrame({'key1':['SF','SF','LA','LA'],
'key2':['one','one','one','two'],
'right_data':[40,50,60,70]})
df_right
Out[16]:
In [17]:
pd.merge(df_left,df_right,on=['key1','key2'],how='outer')
Out[17]:
In [19]:
# pandas by default, will keep columns with matching names
pd.merge(df_left,df_right,on='key1')
Out[19]:
In [21]:
# specify suffixes
pd.merge(df_left,df_right,on='key1',suffixes=('_lefty','_righty'))
Out[21]:
In [ ]: