In [1]:
import pandas as pd
In [2]:
df = pd.DataFrame({'groups': ['group 1','group 2','group 1','group 2','group 3','group 4','group 5','group 1','group 2','group 5'],
'vals': [1,2,3,4,5,6,7,8,9,10]})
In [3]:
df
Out[3]:
In [4]:
df['groups'].value_counts()
Out[4]:
In [5]:
high_dim_columns = ['groups']
for column in high_dim_columns:
a = pd.DataFrame(df[column].value_counts() <= 2)
unique_values = a.index[a[column]].values
df.loc[df[column].isin(unique_values), column] = 'other'
In [6]:
df
Out[6]:
In [ ]: