In [1]:
import pandas as pd
In [2]:
df = pd.DataFrame({'sex': ['M', 'F', 'M', 'F'],
'col_2': [1.2, 3.1, 4.4, 5.5],
'col_3': [1, 2, 3, 4],
'col_4': ['a', 'b', 'c', 'd']})
In [3]:
df
Out[3]:
In [4]:
categorical_variables = ['sex']
for variable in categorical_variables:
# Fill missing data with the word "Missing"
df[variable].fillna("Missing", inplace=True)
# Create array of dummies
dummies = pd.get_dummies(df[variable], prefix=variable)
# Update dataframe to include dummies and drop the main variable
df = pd.concat([df, dummies], axis=1)
df.drop([variable], axis=1, inplace=True)
In [5]:
df
Out[5]:
In [ ]: