Convert categorical columns to one hot encoded columns

In [1]:
import pandas as pd

In [2]:
df = pd.DataFrame({'sex': ['M', 'F', 'M', 'F'], 
                   'col_2': [1.2, 3.1, 4.4, 5.5], 
                   'col_3': [1, 2, 3, 4], 
                   'col_4': ['a', 'b', 'c', 'd']})

In [3]:
df


Out[3]:
col_2 col_3 col_4 sex
0 1.2 1 a M
1 3.1 2 b F
2 4.4 3 c M
3 5.5 4 d F

In [4]:
categorical_variables = ['sex']

for variable in categorical_variables:
    # Fill missing data with the word "Missing"
    df[variable].fillna("Missing", inplace=True)
    # Create array of dummies
    dummies = pd.get_dummies(df[variable], prefix=variable)
    # Update dataframe to include dummies and drop the main variable
    df = pd.concat([df, dummies], axis=1)
    df.drop([variable], axis=1, inplace=True)

In [5]:
df


Out[5]:
col_2 col_3 col_4 sex_F sex_M
0 1.2 1 a 0.0 1.0
1 3.1 2 b 1.0 0.0
2 4.4 3 c 0.0 1.0
3 5.5 4 d 1.0 0.0

In [ ]: