get columns from dataframe which are categorical and convert them using label encoding

In [1]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

In [2]:
df = pd.DataFrame({'col_1': [1, 0, 1, None], 
                   'col_2': [1.2, 3.1, 4.4, 5.5], 
                   'col_3': [1, 2, 3, 4], 
                   'col_4': ['a', 'b', 'c', 'd']})

In [3]:
df


Out[3]:
col_1 col_2 col_3 col_4
0 1.0 1.2 1 a
1 0.0 3.1 2 b
2 1.0 4.4 3 c
3 NaN 5.5 4 d

In [4]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
col_1    3 non-null float64
col_2    4 non-null float64
col_3    4 non-null int64
col_4    4 non-null object
dtypes: float64(2), int64(1), object(1)
memory usage: 208.0+ bytes

In [5]:
bool_cols = [col for col in df if len(df[[col]].dropna()[col].unique()) == 2]

In [6]:
for col in bool_cols:
    label = LabelEncoder()
    label.fit(list(df[col].values.astype("str")))
    df[col] = label.transform(list(df[col].values.astype("str")))

In [7]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
col_1    4 non-null int64
col_2    4 non-null float64
col_3    4 non-null int64
col_4    4 non-null object
dtypes: float64(1), int64(2), object(1)
memory usage: 208.0+ bytes

In [8]:
df


Out[8]:
col_1 col_2 col_3 col_4
0 1 1.2 1 a
1 0 3.1 2 b
2 1 4.4 3 c
3 2 5.5 4 d

In [ ]: