https://groups.google.com/forum/#!topic/python-etl/XRIJovpb6Qc
The conversion I'm trying to make is:
In [1]:
import datetime
import petl.interactive as etl
print etl.__version__
In [2]:
data = """user_id,date_begin,date_end,nb_days,notes,projet_id
user1,2014-07-31,2014-08-07,5.5,5 days + am,cp
user2,2014-07-31,2014-08-07,5.5,5 days + pm,cp
user3,2014-07-31,2014-08-06,5,5 days,cp
"""
In [3]:
day = datetime.timedelta(days=1)
def split_partial_days(row):
if isinstance(row.nb_days, float):
# split out partial days into separate row
if 'am' in row.notes:
# full days
yield (row.user_id,
row.date_begin,
row.date_end - day,
int(row.nb_days),
row.notes.split('+')[0].strip(),
row.projet_id)
# partial days
yield (row.user_id,
row.date_end,
row.date_end,
row.nb_days - int(row.nb_days),
row.notes.split('+')[1].strip(),
row.projet_id)
if 'pm' in row.notes:
# partial days
yield (row.user_id,
row.date_begin,
row.date_begin,
row.nb_days - int(row.nb_days),
row.notes.split('+')[1].strip(),
row.projet_id)
# full days
yield (row.user_id,
row.date_begin + day,
row.date_end,
int(row.nb_days),
row.notes.split('+')[0].strip(),
row.projet_id)
else:
# do nothing
yield row
tbl = (etl
.fromcsv(etl.StringSource(data))
.convert(('date_begin', 'date_end'), etl.dateparser('%Y-%m-%d'))
.convert('nb_days', etl.parsenumber)
.rowmapmany(split_partial_days, fields=['user_id', 'date_begin', 'date_end', 'nb_days', 'notes', 'projet_id'])
)
tbl
Out[3]:
In [3]: