get columns from dataframe which are categorical and convert them using label encoding



In [1]:

    
import pandas as pd
from sklearn.preprocessing import LabelEncoder



In [2]:

    
df = pd.DataFrame({'col_1': [1, 0, 1, None], 
                   'col_2': [1.2, 3.1, 4.4, 5.5], 
                   'col_3': [1, 2, 3, 4], 
                   'col_4': ['a', 'b', 'c', 'd']})



In [3]:

    
df



In [4]:

    
df.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
col_1    3 non-null float64
col_2    4 non-null float64
col_3    4 non-null int64
col_4    4 non-null object
dtypes: float64(2), int64(1), object(1)
memory usage: 208.0+ bytes



In [5]:

    
bool_cols = [col for col in df if len(df[[col]].dropna()[col].unique()) == 2]



In [6]:

    
for col in bool_cols:
    label = LabelEncoder()
    label.fit(list(df[col].values.astype("str")))
    df[col] = label.transform(list(df[col].values.astype("str")))



In [7]:

    
df.info()









    



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 4 entries, 0 to 3
Data columns (total 4 columns):
col_1    4 non-null int64
col_2    4 non-null float64
col_3    4 non-null int64
col_4    4 non-null object
dtypes: float64(1), int64(2), object(1)
memory usage: 208.0+ bytes



In [8]:

    
df



In [ ]: