In [6]:
import pandas as pd;
df1 = pd.DataFrame({'HPI':[80,85,88,85],
'Int_rate':[2, 3, 2, 2],
'US_GDP_Thousands':[50, 55, 65, 55]},
index = [2001, 2002, 2003, 2004])
df2 = pd.DataFrame({'HPI':[80,85,88,85],
'Int_rate':[2, 3, 2, 2],
'US_GDP_Thousands':[50, 55, 65, 55]},
index = [2005, 2006, 2007, 2008])
df3 = pd.DataFrame({'HPI':[80,85,88,85],
'Unemployment':[7, 8, 9, 6],
'Low_tier_HPI':[50, 52, 50, 53]},
index = [2001, 2002, 2003, 2004])
In [12]:
print(pd.merge(df1, df2, on = "HPI")); #like SQL merge
In [13]:
print(pd.merge(df1, df2, on = ["HPI", "Int_rate"]));
In [15]:
df1_indexed = df1.set_index("HPI");
df3_indexed = df3.set_index("HPI");
joindex = df1_indexed.join(df3_indexed);
print(joindex);
print(pd.merge(df1, df3, on = "HPI"));
In [17]:
ndf1 = pd.DataFrame({'Year':[2001, 2002, 2003, 2004],
'Int_rate':[2, 3, 2, 2],
'US_GDP_Thousands':[50, 55, 65, 55]});
ndf2 = pd.DataFrame({'Year':[2001, 2003, 2004, 2005],
'Unemployment':[7, 8, 9, 6],
'Low_tier_HPI':[50, 52, 50, 53]});
merged = pd.merge(ndf1, ndf2, on = "Year");
merged.set_index("Year", inplace = True);
print(merged);
In [18]:
merged = pd.merge(ndf1, ndf2, on = "Year", how = "left");
merged.set_index("Year", inplace = True);
print(merged);
In [19]:
merged = pd.merge(ndf1, ndf2, on = "Year", how = "right");
merged.set_index("Year", inplace = True);
print(merged);
In [20]:
merged = pd.merge(ndf1, ndf2, on = "Year", how = "outer");
merged.set_index("Year", inplace = True);
print(merged);
In [21]:
merged = pd.merge(ndf1, ndf2, on = "Year", how = "inner"); #default
merged.set_index("Year", inplace = True);
print(merged);
In [ ]:
# Use merge when index doesn't matter smth to you, and join when it does.