In [1]:
import pandas as pd
In [2]:
df = pd.read_csv('data/src/sample_pandas_normal.csv', index_col=0)
df_A, df_B = df[:3].copy(), df[3:].copy()
In [3]:
print(df_A)
In [4]:
print(df_B)
In [5]:
print(pd.get_dummies(df_A))
In [6]:
print(pd.get_dummies(df_B))
In [7]:
categories = set(df_A['state'].unique().tolist() + df_B['state'].unique().tolist())
print(categories)
In [8]:
df_A['state'] = pd.Categorical(df_A['state'], categories=categories)
df_B['state'] = pd.Categorical(df_B['state'], categories=categories)
In [9]:
print(df_A['state'].dtypes)
In [10]:
print(pd.get_dummies(df_A))
In [11]:
print(pd.get_dummies(df_B))
In [12]:
df = pd.read_csv('data/src/sample_pandas_normal.csv', index_col=0)
df_train, df_test = df[:3].copy(), df[3:].copy()
In [13]:
categories = df_train['state'].unique()
In [14]:
df_train['state'] = pd.Categorical(df_train['state'], categories=categories)
df_test['state'] = pd.Categorical(df_test['state'], categories=categories)
In [15]:
print(df_test)
In [16]:
print(pd.get_dummies(df_train))
In [17]:
print(pd.get_dummies(df_test))
In [18]:
df = pd.read_csv('data/src/sample_pandas_normal.csv', index_col=0)
df_train, df_test = df[:3].copy(), df[3:].copy()
In [19]:
cols = df_train.select_dtypes('object').columns
In [20]:
for col in cols:
categories = df_train[col].unique()
df_train[col] = pd.Categorical(df_train[col], categories=categories)
df_test[col] = pd.Categorical(df_test[col], categories=categories)
In [21]:
df_train = pd.get_dummies(df_train)
df_test = pd.get_dummies(df_test)
In [22]:
print(df_train)
In [23]:
print(df_test)