Pandas regex filtering



In [36]:

    
import pandas as pd
import re



In [37]:

    
cols = ['PAY_0',
 'PAY_2',
 'PAY_3',
 'PAY_4',
 'PAY_5',
 'PAY_6',
 'BILL_AMT1',
 'BILL_AMT2',
 'BILL_AMT3',
 'BILL_AMT4',
 'BILL_AMT5',
 'BILL_AMT6',
 'PAY_AMT1',
 'PAY_AMT2',
 'PAY_AMT3',
 'PAY_AMT4',
 'PAY_AMT5',
 'PAY_AMT6']



In [38]:

    
# Dummy dataframe for testing regex
df = pd.DataFrame({col: [1,2,3] for col in cols})



In [39]:

    
regex_string = "^PAY_[0-9]+$"



In [40]:

    
df.filter(regex=regex_string)



In [43]:

    
regex = re.compile(regex_string)
# This gives you back an iterable
columns_of_interest = filter(regex.match, df.columns)
type(columns_of_interest)









    Out[43]:





filter



In [42]:

    
columns_of_interest_list = list(columns_of_interest)
columns_of_interest_list









    Out[42]:





['PAY_0', 'PAY_2', 'PAY_3', 'PAY_4', 'PAY_5', 'PAY_6']

Pandas regex filtering

References