In [1]:
#### Introduction to Data Wrangling with Pandas ####
## Page 4 ##

In [2]:
#### Merging Data Frames ####

In [19]:
# Pandas concat(), merge(), append() functions come handy here

In [3]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline 
#so that we can view the graphs inside the notebook

In [4]:
df1 = pd.DataFrame({'A': range(0,4),
                    'B': range(0,4),
                    'C': range(0,4),
                    'D': range(0,4)},                     
                    index=[0, 1, 2, 3])

In [5]:
df1


Out[5]:
A B C D
0 0 0 0 0
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3

In [6]:
df2 = pd.DataFrame({'A': range(4,8),
                    'B': range(4,8),
                     'C': range(4,8),
                    'D': range(4,8)},                     
                    index=[4, 5, 6, 7])

In [7]:
df2


Out[7]:
A B C D
4 4 4 4 4
5 5 5 5 5
6 6 6 6 6
7 7 7 7 7

In [8]:
result = pd.concat([df1, df2])
result


Out[8]:
A B C D
0 0 0 0 0
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
4 4 4 4 4
5 5 5 5 5
6 6 6 6 6
7 7 7 7 7

In [9]:
result = pd.concat([df1, df2], keys=['first','second'])

In [10]:
result


Out[10]:
A B C D
first 0 0 0 0 0
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3
second 4 4 4 4 4
5 5 5 5 5
6 6 6 6 6
7 7 7 7 7

In [11]:
result.loc['second']


Out[11]:
A B C D
4 4 4 4 4
5 5 5 5 5
6 6 6 6 6
7 7 7 7 7

In [12]:
df1


Out[12]:
A B C D
0 0 0 0 0
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3

In [13]:
df3 = pd.DataFrame({'B': range(0,4),
                    'D': range(0,4),
                    'F': range(0,4)},
                   index=[2, 3, 6, 7])

In [33]:
df3


Out[33]:
B D F
2 0 0 0
3 1 1 1
6 2 2 2
7 3 3 3

In [14]:
result = pd.concat([df1, df3])

In [35]:
result


Out[35]:
A B C D F
0 0.0 0 0.0 0 NaN
1 1.0 1 1.0 1 NaN
2 2.0 2 2.0 2 NaN
3 3.0 3 3.0 3 NaN
2 NaN 0 NaN 0 0.0
3 NaN 1 NaN 1 1.0
6 NaN 2 NaN 2 2.0
7 NaN 3 NaN 3 3.0

In [15]:
df1


Out[15]:
A B C D
0 0 0 0 0
1 1 1 1 1
2 2 2 2 2
3 3 3 3 3

In [36]:
df3


Out[36]:
B D F
2 0 0 0
3 1 1 1
6 2 2 2
7 3 3 3

In [15]:
result = pd.concat([df1, df3], axis=1)

In [16]:
result


Out[16]:
A B C D B D F
0 0.0 0.0 0.0 0.0 NaN NaN NaN
1 1.0 1.0 1.0 1.0 NaN NaN NaN
2 2.0 2.0 2.0 2.0 0.0 0.0 0.0
3 3.0 3.0 3.0 3.0 1.0 1.0 1.0
6 NaN NaN NaN NaN 2.0 2.0 2.0
7 NaN NaN NaN NaN 3.0 3.0 3.0

In [17]:
result = pd.concat([df1, df3], axis =1, join='inner')

In [18]:
result


Out[18]:
A B C D B D F
2 2 2 2 2 0 0 0
3 3 3 3 3 1 1 1

In [20]:
result = pd.concat([df1, df3], join='inner')
result


Out[20]:
B D
0 0 0
1 1 1
2 2 2
3 3 3
2 0 0
3 1 1
6 2 2
7 3 3

In [23]:
# SQL Like Join Operations inner outer, left, right on given keys
# https://pandas.pydata.org/pandas-docs/stable/comparison_with_sql.html

In [24]:
# Real power- merge datasets obtained from different sources.