In [1]:
%run setup_env.py

Merge, join, and concatenate

Concatenating


In [2]:
df = pd.DataFrame(np.random.randn(10, 3))
display_html(df)
display_html(
    pd.concat([df[:3], df[3:7], df[7:]], axis=0),
    pd.concat([df[:3], df[3:7], df[7:]], axis=1),
    pd.concat([df[:3], df[3:7], df[7:]], axis=0, join='inner'),
    pd.concat([df[:3], df[3:7], df[7:]], axis=1, join='inner'),
    pd.concat([df[:3], df[3:7], df[7:]], keys=['first', 'second']),
)


0 1 2
0 0.550068 -0.227791 0.837940
1 0.647662 1.361502 1.545621
2 0.083181 0.174687 0.360460
3 0.053512 -1.346671 0.142603
4 -0.192309 0.628849 -0.986595
5 -1.293365 0.293792 0.035819
6 0.838352 2.484211 0.652089
7 0.181381 0.435708 -1.376555
8 0.953703 -1.504879 0.034850
9 0.443744 -0.163172 -0.672670
0 1 2
0 0.550068 -0.227791 0.837940
1 0.647662 1.361502 1.545621
2 0.083181 0.174687 0.360460
3 0.053512 -1.346671 0.142603
4 -0.192309 0.628849 -0.986595
5 -1.293365 0.293792 0.035819
6 0.838352 2.484211 0.652089
7 0.181381 0.435708 -1.376555
8 0.953703 -1.504879 0.034850
9 0.443744 -0.163172 -0.672670
0 1 2 0 1 2 0 1 2
0 0.550068 -0.227791 0.837940 NaN NaN NaN NaN NaN NaN
1 0.647662 1.361502 1.545621 NaN NaN NaN NaN NaN NaN
2 0.083181 0.174687 0.360460 NaN NaN NaN NaN NaN NaN
3 NaN NaN NaN 0.053512 -1.346671 0.142603 NaN NaN NaN
4 NaN NaN NaN -0.192309 0.628849 -0.986595 NaN NaN NaN
5 NaN NaN NaN -1.293365 0.293792 0.035819 NaN NaN NaN
6 NaN NaN NaN 0.838352 2.484211 0.652089 NaN NaN NaN
7 NaN NaN NaN NaN NaN NaN 0.181381 0.435708 -1.376555
8 NaN NaN NaN NaN NaN NaN 0.953703 -1.504879 0.034850
9 NaN NaN NaN NaN NaN NaN 0.443744 -0.163172 -0.672670
0 1 2
0 0.550068 -0.227791 0.837940
1 0.647662 1.361502 1.545621
2 0.083181 0.174687 0.360460
3 0.053512 -1.346671 0.142603
4 -0.192309 0.628849 -0.986595
5 -1.293365 0.293792 0.035819
6 0.838352 2.484211 0.652089
7 0.181381 0.435708 -1.376555
8 0.953703 -1.504879 0.034850
9 0.443744 -0.163172 -0.672670
0 1 2 0 1 2 0 1 2
0 1 2
first 0 0.550068 -0.227791 0.837940
1 0.647662 1.361502 1.545621
2 0.083181 0.174687 0.360460
second 3 0.053512 -1.346671 0.142603
4 -0.192309 0.628849 -0.986595
5 -1.293365 0.293792 0.035819
6 0.838352 2.484211 0.652089

Concatenating using append


In [3]:
df = pd.DataFrame(np.random.randn(5, 3))
display_html(
    df[:3].append(df[:2]),
    df[:3].append([df[:2], df[2:]]),
)


0 1 2
0 -0.845175 -0.509344 1.740447
1 0.779514 0.270508 -0.407723
2 1.030328 0.737203 -0.430560
0 -0.845175 -0.509344 1.740447
1 0.779514 0.270508 -0.407723
0 1 2
0 -0.845175 -0.509344 1.740447
1 0.779514 0.270508 -0.407723
2 1.030328 0.737203 -0.430560
0 -0.845175 -0.509344 1.740447
1 0.779514 0.270508 -0.407723
2 1.030328 0.737203 -0.430560
3 0.115000 0.139875 0.587525
4 -0.071168 1.455069 -0.668884

Ignoring indexes on the concatenation axis


In [4]:
df1 = pd.DataFrame(np.random.randn(3, 3), columns=['A', 'B', 'C'])
df2 = pd.DataFrame(np.random.randn(2, 3), columns=['A', 'B', 'C'])
display_html(
    df1.append(df2, ignore_index=True),
    df1.append(df2, ignore_index=False),
    pd.concat([df1, df2], ignore_index=True),
    pd.concat([df1, df2], ignore_index=False),
)


A B C
0 0.004531 -0.643150 0.873867
1 -0.515865 -2.228739 -0.798334
2 1.806679 0.552514 0.219978
3 -1.099130 -0.160105 2.204383
4 -0.987879 0.208997 -0.364813
A B C
0 0.004531 -0.643150 0.873867
1 -0.515865 -2.228739 -0.798334
2 1.806679 0.552514 0.219978
0 -1.099130 -0.160105 2.204383
1 -0.987879 0.208997 -0.364813
A B C
0 0.004531 -0.643150 0.873867
1 -0.515865 -2.228739 -0.798334
2 1.806679 0.552514 0.219978
3 -1.099130 -0.160105 2.204383
4 -0.987879 0.208997 -0.364813
A B C
0 0.004531 -0.643150 0.873867
1 -0.515865 -2.228739 -0.798334
2 1.806679 0.552514 0.219978
0 -1.099130 -0.160105 2.204383
1 -0.987879 0.208997 -0.364813

Concatenating with mixed ndims

df = pd.DataFrame(np.random.randn(4, 2), columns=['A', 'B']) s = pd.Series(np.random.randn(4), name='S') display_html( pd.concat([df, s]), pd.concat([df, s], axis=1), )

More concatenating with group keys

Appending row to a DataFrame


In [5]:
df = pd.DataFrame(np.random.randn(3, 5), columns=['A', 'B', 'C', 'D', 'E'])
s = df.xs(1)
display_html(
    df.append(s),
    df.append([{'A': 1, 'B': 0, 'C': 3, 'E': 10, 'G': -11}])
)


A B C D E
0 -0.370057 1.412544 1.476252 0.318944 -0.920840
1 -0.379257 1.957082 -1.696747 -0.003020 -0.163660
2 1.795371 0.417156 -1.710920 -1.947254 -0.590684
1 -0.379257 1.957082 -1.696747 -0.003020 -0.163660
A B C D E G
0 -0.370057 1.412544 1.476252 0.318944 -0.920840 NaN
1 -0.379257 1.957082 -1.696747 -0.003020 -0.163660 NaN
2 1.795371 0.417156 -1.710920 -1.947254 -0.590684 NaN
0 1.000000 0.000000 3.000000 NaN 10.000000 -11