In [1]:
import numpy as np 
import pandas as pd
from pandas import Series,DataFrame

In [2]:
dframe1 = DataFrame({'key': ['X', 'Z', 'Y', 'Z', 'X', 'X'], 'data_set_1': np.arange(6)})

dframe1


Out[2]:
data_set_1 key
0 0 X
1 1 Z
2 2 Y
3 3 Z
4 4 X
5 5 X

In [5]:
dframe2 = DataFrame({'key': ['Q', 'Y', 'Z'], 'data_set_2': np.arange(3) + 1})

dframe2


Out[5]:
data_set_2 key
0 1 Q
1 2 Y
2 3 Z

In [6]:
pd.merge(dframe1, dframe2)


Out[6]:
data_set_1 key data_set_2
0 1 Z 3
1 3 Z 3
2 2 Y 2

In [7]:
pd.merge(dframe1, dframe2, on='key')


Out[7]:
data_set_1 key data_set_2
0 1 Z 3
1 3 Z 3
2 2 Y 2

In [8]:
pd.merge(dframe1, dframe2, on='key', how='left')


Out[8]:
data_set_1 key data_set_2
0 0 X NaN
1 1 Z 3.0
2 2 Y 2.0
3 3 Z 3.0
4 4 X NaN
5 5 X NaN

In [9]:
pd.merge(dframe1, dframe2, on='key', how='right')


Out[9]:
data_set_1 key data_set_2
0 1.0 Z 3
1 3.0 Z 3
2 2.0 Y 2
3 NaN Q 1

In [10]:
pd.merge(dframe1, dframe2, on='key', how='outer')


Out[10]:
data_set_1 key data_set_2
0 0.0 X NaN
1 4.0 X NaN
2 5.0 X NaN
3 1.0 Z 3.0
4 3.0 Z 3.0
5 2.0 Y 2.0
6 NaN Q 1.0

In [11]:
dframe3 = DataFrame({'key': ['X', 'X', 'X', 'Y', 'Z', 'Z'], 'data_set_3': range(6)})

In [12]:
dframe4 = DataFrame({'key': ['Y', 'Y', 'X', 'X', 'Z'], 'data_set_4': range(5)})

In [13]:
pd.merge(dframe3, dframe4)


Out[13]:
data_set_3 key data_set_4
0 0 X 2
1 0 X 3
2 1 X 2
3 1 X 3
4 2 X 2
5 2 X 3
6 3 Y 0
7 3 Y 1
8 4 Z 4
9 5 Z 4

In [14]:
df_left = DataFrame({'key1': ['SF', 'SF', 'LA'], 
                     'key2': ['one', 'two', 'three'],
                     'left_data': [10,20,30]})

In [15]:
df_right = DataFrame({'key1': ['SF', 'SF', 'LA', 'LA'], 
                     'key2': ['one', 'one', 'one', 'three'],
                     'left_data': [40,50,60,79]})

In [16]:
pd.merge(df_left, df_right, on=['key1', 'key2'], how='outer')


Out[16]:
key1 key2 left_data_x left_data_y
0 SF one 10.0 40.0
1 SF one 10.0 50.0
2 SF two 20.0 NaN
3 LA three 30.0 79.0
4 LA one NaN 60.0

In [17]:
pd.merge(df_left, df_right, on=['key1'], how='outer')


Out[17]:
key1 key2_x left_data_x key2_y left_data_y
0 SF one 10 one 40
1 SF one 10 one 50
2 SF two 20 one 40
3 SF two 20 one 50
4 LA three 30 one 60
5 LA three 30 three 79

In [18]:
pd.merge(df_left, df_right, on=['key1'], how='outer', suffixes=('_lefty', '_righty'))


Out[18]:
key1 key2_lefty left_data_lefty key2_righty left_data_righty
0 SF one 10 one 40
1 SF one 10 one 50
2 SF two 20 one 40
3 SF two 20 one 50
4 LA three 30 one 60
5 LA three 30 three 79

In [ ]: