In [ ]:
%run setup_env.py

Working with Text Data


In [ ]:
s = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca', np.nan, 'CABA', 'dog', 'cat'])
display_pretty(s.str.lower(), s.str.upper(), s.str.len())

Splitting and Replacing Strings


In [ ]:
log.info('splitting')
s2 = pd.Series(['a_b_c', 'c_d_e', np.nan, 'f_g_h_z'])
display_pretty(s2.str.split('_').apply(pd.Series))

log.info('replacing')
s3 = pd.Series(['A', 'B', 'C', 'Aaba', 'Baca',
            '', np.nan, 'CABA', 'dog', 'cat'])
display_pretty(s3.str.replace('^.a|dog', 'XX-XX ', case=False))

log.info('extracting substrings')
s = pd.Series(['a1', 'b2', 'c3']).str.extract('(?P<letter>[ab])(?P<digit>\d)')
display_pretty(s)