In [ ]:
import numpy as np
import pandas as pd
import matplotlib as plt
%matplotlib inline
# set default diplay row count
# pd.options.display.max_rows=100
In [ ]:
data = np.array([x+str(y) for y in range(12) for x in list('ABCD')])
print('type(data): ', type(data))
data = data.reshape(12,4)
print('data.shape: ', data.shape)
In [ ]:
df1 = pd.DataFrame(data[:4], columns=['A', 'B', 'C', 'D'])
df2 = pd.DataFrame(data[4:8], columns=['A', 'B', 'C', 'D'])
df3 = pd.DataFrame(data[8:12], columns=['A', 'B', 'C', 'D'])
In [ ]:
df1
In [ ]:
df2
In [ ]:
df3
In [ ]:
res1 = pd.concat([df1,df2,df3], ignore_index=True)
res1
In [ ]:
# keys cannot used with ignore_index=True
res2 = pd.concat([df1, df2, df3], keys=['x','y'], ignore_index=False)
res2
In [ ]:
res2.loc['x']
In [ ]:
data4 = np.array([x+str(y) for y in [2,3,6,7] for x in list('BDE')])
data4 = data4.reshape(4, 3)
df4 = pd.DataFrame(data4, index=[2,3,6,7], columns=['B','D','E'])
df4
In [ ]:
# default: outer
res3 = pd.concat([df1, df4], axis=1)
res3
In [ ]:
res4 = pd.concat([df1, df4], axis=1, join='inner')
res4
In [ ]:
res5 = pd.concat([df1, df4], axis=1, join_axes=[df1.index])
res5
In [ ]:
res6 = pd.concat([df1, df4], axis=1, join_axes=[df1.index], ignore_index=True)
res6
In [ ]:
app1 = df1.append(df2)
app1
In [ ]:
# H and V fill
app2 = df1.append(df4, sort=False)
app2
In [ ]:
left = pd.DataFrame(
[['K0', 'A0', 'B0'],['K1', 'A1', 'B1'],['K2', 'A2', 'B2'], ['K3', 'A3', 'B3']],
columns=['K','A','B'])
left
In [ ]:
right = pd.DataFrame(
[['K0', 'C0', 'D0'],['K1', 'C1', 'D1'],['K2', 'C2', 'D2'], ['K3', 'C3', 'D3']],
columns=['K','C','D'])
right
In [ ]:
merg1 = pd.merge(left, right)
# merg1 = pd.merge(left, right, on='K')
merg1
In [ ]:
left2 = pd.DataFrame(
[['K0', 'J0', 'A0', 'B0'],['K0', 'J1', 'A1', 'B1'],['K1', 'J0', 'A2', 'B2'], ['K2', 'J1', 'A3', 'B3']],
columns=['K', 'J', 'A','B'])
left2
In [ ]:
right2 = pd.DataFrame(
[['K0', 'J0', 'A0', 'B0'],['K1', 'J0', 'A1', 'B1'],['K1', 'J0', 'A2', 'B2'], ['K2', 'J0', 'A3', 'B3']],
columns=['K', 'J', 'A','B'])
right2
In [ ]:
# default: how = 'inner'
mrg2 = pd.merge(left2, right2, on=['K', 'J'], how='inner')
mrg2
In [ ]:
mrg3 = pd.merge(left2, right2, on=['K', 'J'], how='outer')
mrg3
In [ ]:
mrg4 = pd.merge(left2, right2, on=['K', 'J'], how='left')
mrg4
In [ ]:
mrg5 = pd.merge(left2, right2, on=['K', 'J'], how='right')
mrg5
In [ ]:
oth_df = pd.DataFrame({'col1':[1,2], 'col2':[2,3]})
oth_df
In [ ]:
oth_df.loc[2] = np.array([3,4])
oth_df
# error: single positional indexer is out-of-bounds
# oth_df.iloc[3] = np.array([4,5])