In [2]:
import pandas as pd
import numpy as np
pd.__version__, np.__version__
Out[2]:
In [12]:
df_employees = pd.DataFrame({
'id':[1,2,3,4],
'name':['alice','bob','charlie','david'],
'company_id':[1,2,1,2]
})
df_employees
Out[12]:
In [14]:
df_companies = pd.DataFrame({
'id':[1,2],
'name': ['bell labs', 'xerox']
})
df_companies
Out[14]:
In [22]:
df_employees_sal = pd.DataFrame({
'id':[1,1,2,2,3,3,4,4],
'name':['alice','alice','bob','bob','charlie','charlie','david','david'],
'year':[1980,1981,1980,1981,1980,1981,1980,1981],
'salary':[30000,30000,40000,41000,35000,40000,45000,45000],
'company_id':[1,1,2,2,1,1,2,2]
})
df_employees_sal.sort_values(by=['year','name'])
Out[22]:
In [23]:
df_companies_rev = pd.DataFrame({
'id':[1,1,2,2],
'name':['bell labs','bell labs','xerox','xerox'],
'year':[1980,1981,1980,1981],
'revenue':[1130000,1130000,5000000,500000]
})
df_companies_rev.sort_values(by=['year','name'])
Out[23]:
In [24]:
pd.merge(
df_employees_sal,
df_companies_rev,
left_on=['year','company_id'],
right_on=['year','id']
)
Out[24]:
In [ ]: