In [36]:
import pandas as pd
import re
In [37]:
cols = ['PAY_0',
'PAY_2',
'PAY_3',
'PAY_4',
'PAY_5',
'PAY_6',
'BILL_AMT1',
'BILL_AMT2',
'BILL_AMT3',
'BILL_AMT4',
'BILL_AMT5',
'BILL_AMT6',
'PAY_AMT1',
'PAY_AMT2',
'PAY_AMT3',
'PAY_AMT4',
'PAY_AMT5',
'PAY_AMT6']
In [38]:
# Dummy dataframe for testing regex
df = pd.DataFrame({col: [1,2,3] for col in cols})
In [39]:
regex_string = "^PAY_[0-9]+$"
In [40]:
df.filter(regex=regex_string)
Out[40]:
In [43]:
regex = re.compile(regex_string)
# This gives you back an iterable
columns_of_interest = filter(regex.match, df.columns)
type(columns_of_interest)
Out[43]:
In [42]:
columns_of_interest_list = list(columns_of_interest)
columns_of_interest_list
Out[42]: