In [2]:
import pandas as pd
import numpy as np

pd.__version__, np.__version__


Out[2]:
('0.24.2', '1.16.3')

In [12]:
df_employees = pd.DataFrame({
    'id':[1,2,3,4],
    'name':['alice','bob','charlie','david'],
    'company_id':[1,2,1,2]
})
df_employees


Out[12]:
id name company_id
0 1 alice 1
1 2 bob 2
2 3 charlie 1
3 4 david 2

In [14]:
df_companies = pd.DataFrame({
    'id':[1,2],
    'name': ['bell labs', 'xerox']
})
df_companies


Out[14]:
id name
0 1 bell labs
1 2 xerox

merge on multiple columns


In [22]:
df_employees_sal = pd.DataFrame({
    'id':[1,1,2,2,3,3,4,4],
    'name':['alice','alice','bob','bob','charlie','charlie','david','david'],
    'year':[1980,1981,1980,1981,1980,1981,1980,1981],
    'salary':[30000,30000,40000,41000,35000,40000,45000,45000],
    'company_id':[1,1,2,2,1,1,2,2]
})
df_employees_sal.sort_values(by=['year','name'])


Out[22]:
id name year salary company_id
0 1 alice 1980 30000 1
2 2 bob 1980 40000 2
4 3 charlie 1980 35000 1
6 4 david 1980 45000 2
1 1 alice 1981 30000 1
3 2 bob 1981 41000 2
5 3 charlie 1981 40000 1
7 4 david 1981 45000 2

In [23]:
df_companies_rev = pd.DataFrame({
    'id':[1,1,2,2],
    'name':['bell labs','bell labs','xerox','xerox'],
    'year':[1980,1981,1980,1981],
    'revenue':[1130000,1130000,5000000,500000]
})
df_companies_rev.sort_values(by=['year','name'])


Out[23]:
id name year revenue
0 1 bell labs 1980 1130000
2 2 xerox 1980 5000000
1 1 bell labs 1981 1130000
3 2 xerox 1981 500000

In [24]:
pd.merge(
    df_employees_sal,
    df_companies_rev,
    left_on=['year','company_id'],
    right_on=['year','id']
)


Out[24]:
id_x name_x year salary company_id id_y name_y revenue
0 1 alice 1980 30000 1 1 bell labs 1130000
1 3 charlie 1980 35000 1 1 bell labs 1130000
2 1 alice 1981 30000 1 1 bell labs 1130000
3 3 charlie 1981 40000 1 1 bell labs 1130000
4 2 bob 1980 40000 2 2 xerox 5000000
5 4 david 1980 45000 2 2 xerox 5000000
6 2 bob 1981 41000 2 2 xerox 500000
7 4 david 1981 45000 2 2 xerox 500000

In [ ]: