merge dataframe and split again. Useful for merging test and train data to create panel. Then apply transformations on panel in one go. Finally split the panel back into train and test dataframes.

In [1]:
import pandas as pd
import numpy as np

In [2]:
ts1 = [1,2,3,4]
ts2 = [6,7,8,9]
d = {'col_1': ts1, 'col_2': ts2}

In [3]:
d


Out[3]:
{'col_1': [1, 2, 3, 4], 'col_2': [6, 7, 8, 9]}

In [4]:
df_1 = pd.DataFrame(data=d)

In [5]:
df_1


Out[5]:
col_1 col_2
0 1 6
1 2 7
2 3 8
3 4 9

In [6]:
df_2 = pd.DataFrame(np.random.randn(3, 2), columns=['col_1', 'col_2'])

In [7]:
df_2


Out[7]:
col_1 col_2
0 0.654547 -1.201099
1 -0.088006 -0.049599
2 0.609881 -1.003260

In [8]:
df_all = pd.concat((df_1, df_2), axis=0, ignore_index=True)

In [9]:
df_all


Out[9]:
col_1 col_2
0 1.000000 6.000000
1 2.000000 7.000000
2 3.000000 8.000000
3 4.000000 9.000000
4 0.654547 -1.201099
5 -0.088006 -0.049599
6 0.609881 -1.003260

In [10]:
print(df_1.shape)
print(df_2.shape)
print(df_all.shape)


(4, 2)
(3, 2)
(7, 2)

In [11]:
df_train = df_all[:df_1.shape[0]]
df_test = df_all[df_1.shape[0]:]

In [12]:
print(df_train.shape)
print(df_test.shape)
print(df_all.shape)


(4, 2)
(3, 2)
(7, 2)

In [ ]: